package com.rhhz.pubplatformspider.parser;

import com.rhhz.pubplatformspider.utils.DateUtils;
import com.rhhz.pubplatformspider.utils.StringUtils;
import com.rhhz.pubplatformspider.vo.AffiliVo;
import com.rhhz.pubplatformspider.vo.ArticleVo;
import com.rhhz.pubplatformspider.vo.AuthorVo;
import com.rhhz.pubplatformspider.vo.KeywordVo;
import com.rhhz.pubplatformspider.vo.ReferVo;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:com/rhhz/pubplatformspider/parser/SpringerParser.class */
public class SpringerParser {
    public static List<String> parseIssuePage(String str) throws Exception {
        Elements select = Jsoup.parse(str).select("ol>li");
        System.out.println("Sprigner上抓取的文章数量:" + select.size());
        ArrayList arrayList = new ArrayList();
        Iterator it = select.iterator();
        while (it.hasNext()) {
            arrayList.add(((Element) it.next()).selectFirst("article>div>h3>a").attr("href"));
        }
        return arrayList;
    }

    public static ArticleVo parseArticleMeta(String str) throws Exception {
        String str2;
        String str3;
        String str4;
        String str5;
        String str6;
        ArticleVo articleVo = new ArticleVo();
        Document.OutputSettings outputSettings = new Document.OutputSettings();
        outputSettings.prettyPrint(false);
        Document parse = Jsoup.parse(str);
        parse.outputSettings(outputSettings);
        String str7 = "";
        String str8 = "";
        String str9 = "";
        str2 = "";
        str3 = "";
        String str10 = "";
        Element selectFirst = parse.selectFirst("li[data-test=article-category]");
        if (selectFirst != null) {
            articleVo.setColumnEn(selectFirst.text());
        }
        Element selectFirst2 = parse.selectFirst("h1.c-article-title");
        if (selectFirst2 != null) {
            articleVo.setTitleEn(processCommonPara(selectFirst2.html()));
        }
        Elements select = parse.select("section");
        Iterator it = parse.selectFirst("#Ack1-content").select("h3.c-article__sub-heading").iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Element element = (Element) it.next();
            if (element.text().equals("Funding")) {
                str10 = element.nextElementSibling().html();
                break;
            }
        }
        if (StringUtils.isEmpty(str10)) {
            for (int i = 0; i < select.size(); i++) {
                if (((Element) select.get(i)).attr("data-title").toLowerCase().startsWith("funding") || ((Element) select.get(i)).attr("data-title").toLowerCase().startsWith("acknowledgments") || ((Element) select.get(i)).attr("data-title").toLowerCase().startsWith("acknowledgements")) {
                    str10 = ((Element) select.get(i)).selectFirst("div>div>p").html();
                }
            }
        }
        articleVo.setFundEn(str10);
        boolean z = false;
        Iterator it2 = parse.select("#Abs1-content").select("h3,p").iterator();
        while (it2.hasNext()) {
            Element element2 = (Element) it2.next();
            String processCommonPara = processCommonPara(element2.html().replace("&nbsp;", " "));
            if (!"Graphic Abstract".equals(processCommonPara)) {
                if (element2.nodeName().equals("h3")) {
                    if (StringUtils.isNotEmpty(str7)) {
                        str7 = String.valueOf(str7) + "</sec>";
                    }
                    str7 = String.valueOf(str7) + "<sec><title>" + processCommonPara + "</title><p>";
                    z = true;
                }
                if (element2.nodeName().equals("p")) {
                    if (z) {
                        str7 = String.valueOf(str7) + processCommonPara + "</p>";
                        z = false;
                    } else {
                        str7 = String.valueOf(str7) + processCommonPara;
                    }
                }
            }
        }
        articleVo.setAbsEn(str7);
        Element selectFirst3 = parse.selectFirst("p.c-article-info-details");
        if (selectFirst3 != null) {
            String text = selectFirst3.text();
            int indexOf = text.indexOf("volume");
            str3 = indexOf != -1 ? text.substring(indexOf + 6, text.indexOf(",", indexOf)).trim() : "";
            str2 = text.indexOf("pages ") != -1 ? text.substring(text.indexOf("pages ") + 6, text.indexOf("(")).trim() : "";
            if (text.indexOf("page ") != -1) {
                str2 = text.substring(text.indexOf("page ") + 5, text.indexOf("(")).trim();
            }
            if (text.indexOf("number") != -1) {
                str2 = text.substring(text.indexOf("number:") + 7, text.indexOf("(")).trim();
            }
            if (text.indexOf("(") != -1) {
                str8 = text.substring(text.indexOf("(") + 1, text.indexOf(")")).trim();
            }
        }
        if (str2.indexOf("–") != -1) {
            str4 = str2.substring(0, str2.indexOf("–"));
            str5 = str2.substring(str2.indexOf("–") + 1, str2.length());
        } else {
            str4 = str2;
            str5 = str2;
        }
        articleVo.setYear(str8);
        articleVo.setVolume(str3);
        articleVo.setFpage(str4);
        articleVo.setLpage(str5);
        Element selectFirst4 = parse.selectFirst("p.c-bibliographic-information__citation");
        if (selectFirst4 != null) {
            String text2 = selectFirst4.text();
            if (text2.indexOf("doi") != -1) {
                str9 = text2.substring(text2.indexOf("/10.") + 1).trim();
                articleVo.setDoi(processCommonPara(str9));
            }
        }
        articleVo.setPdfDownUrl("https://link.springer.com/content/pdf/" + str9 + ".pdf");
        Elements select2 = parse.select("li.c-article-author-list__item");
        ArrayList arrayList = new ArrayList();
        Iterator it3 = select2.iterator();
        while (it3.hasNext()) {
            Element element3 = (Element) it3.next();
            String text3 = element3.selectFirst("a").text();
            Element selectFirst5 = element3.selectFirst("sup");
            String text4 = selectFirst5 != null ? selectFirst5.text() : "";
            AuthorVo authorVo = new AuthorVo();
            if (text3.indexOf(" ") != -1) {
                String[] split = text3.split(" ");
                if (split.length == 2) {
                    authorVo.setSurNameEn(split[1]);
                    authorVo.setGivenNameEn(split[0]);
                } else if (split.length > 2) {
                    authorVo.setSurNameEn(split[split.length - 1]);
                    String str11 = "";
                    for (int i2 = 0; i2 < split.length - 1; i2++) {
                        str11 = String.valueOf(str11) + split[i2] + " ";
                    }
                    authorVo.setGivenNameEn(str11.trim());
                }
            }
            Element selectFirst6 = element3.selectFirst("a.js-orcid");
            if (selectFirst6 != null) {
                authorVo.setOrcid(selectFirst6.attr("href"));
            }
            if (element3.selectFirst("a>svg") != null) {
                authorVo.setAuthorType("corresp");
            }
            authorVo.setAuthorNameEn(text3);
            authorVo.setAddressLabel(text4);
            arrayList.add(authorVo);
        }
        Iterator it4 = parse.select("meta[name=citation_author], meta[name=citation_author_email]").iterator();
        while (it4.hasNext()) {
            Element element4 = (Element) it4.next();
            if (!element4.attr("name").equals("citation_author_email")) {
                String lowerCase = element4.attr("content").toLowerCase();
                if (lowerCase.indexOf(",") != -1) {
                    lowerCase = lowerCase.substring(lowerCase.indexOf(",") + 1).trim();
                }
                Element nextElementSibling = element4.nextElementSibling();
                if (nextElementSibling.attr("name").equals("citation_author_email")) {
                    String attr = nextElementSibling.attr("content");
                    Iterator<AuthorVo> it5 = arrayList.iterator();
                    while (true) {
                        if (!it5.hasNext()) {
                            break;
                        }
                        AuthorVo next = it5.next();
                        if ("corresp".equals(next.getAuthorType())) {
                            String lowerCase2 = StringUtils.isNotEmpty(next.getSurNameEn()) ? String.valueOf(next.getGivenNameEn().toLowerCase()) + " " + next.getSurNameEn().toLowerCase() : next.getAuthorNameEn().toLowerCase();
                            if (StringUtils.isNotEmpty(lowerCase2) && lowerCase2.contains(lowerCase)) {
                                next.setEmail(attr);
                                break;
                            }
                        }
                    }
                }
            }
        }
        articleVo.setAuthorVos(arrayList);
        Elements select3 = parse.select("ol.c-article-author-affiliation__list>li");
        ArrayList arrayList2 = new ArrayList();
        Iterator it6 = select3.iterator();
        while (it6.hasNext()) {
            Element element5 = (Element) it6.next();
            AffiliVo affiliVo = new AffiliVo();
            String replace = element5.attr("id").toLowerCase().replace("aff", "");
            String html = element5.selectFirst("p.c-article-author-affiliation__address").html();
            affiliVo.setLabel(replace);
            affiliVo.setAddressStrEn(processCommonPara(html));
            arrayList2.add(affiliVo);
        }
        articleVo.setAffiliVos(arrayList2);
        Iterator it7 = parse.select("#corresponding-author-list").iterator();
        while (it7.hasNext()) {
            String text5 = ((Element) it7.next()).text();
            for (AuthorVo authorVo2 : arrayList) {
                if (text5.contains(authorVo2.getAuthorNameEn())) {
                    authorVo2.setAuthorType("corresp");
                    String str12 = String.valueOf(authorVo2.getGivenNameEn()) + " " + authorVo2.getSurNameEn();
                    if (StringUtils.isNotEmpty(authorVo2.getEmail())) {
                        authorVo2.setAuthorDescEn(String.valueOf(str12) + " <email>" + authorVo2.getEmail() + "</email>");
                    } else {
                        authorVo2.setAuthorDescEn(str12);
                    }
                }
            }
        }
        Iterator it8 = parse.select("li.c-bibliographic-information__list-item").iterator();
        while (it8.hasNext()) {
            Element element6 = (Element) it8.next();
            Element selectFirst7 = element6.selectFirst("span.c-bibliographic-information__value>time");
            if (selectFirst7 != null) {
                String attr2 = selectFirst7.attr("datetime");
                if (element6.text().indexOf("Received") != -1) {
                    articleVo.setReceivedDate(DateUtils.strConvert2Date("yyyy-MM-dd", attr2));
                    articleVo.setReceivedDateStr(attr2);
                } else if (element6.text().indexOf("Revised") != -1) {
                    articleVo.setRevisedDate(DateUtils.strConvert2Date("yyyy-MM-dd", attr2));
                    articleVo.setRevisedDateStr(attr2);
                } else if (element6.text().indexOf("Accepted") != -1) {
                    articleVo.setAcceptedDate(DateUtils.strConvert2Date("yyyy-MM-dd", attr2));
                    articleVo.setAcceptedDateStr(attr2);
                } else if (element6.text().indexOf("Published") != -1) {
                    articleVo.setPreferredDate(DateUtils.strConvert2Date("yyyy-MM-dd", attr2));
                    articleVo.setPreferredDateStr(attr2);
                } else if (element6.text().indexOf("Issue Date") != -1) {
                    articleVo.setPubDate(DateUtils.strConvert2Date("yyyy-MM", attr2));
                    articleVo.setPubDateStr(attr2);
                }
            }
        }
        Iterator it9 = parse.select("h3.c-article__sub-heading").iterator();
        while (it9.hasNext()) {
            Element element7 = (Element) it9.next();
            if ("Key words".equals(element7.text()) || "Keywords".equals(element7.text())) {
                Elements select4 = element7.nextElementSibling().select("li.c-article-subject-list__subject");
                ArrayList arrayList3 = new ArrayList();
                Iterator it10 = select4.iterator();
                while (it10.hasNext()) {
                    String processCommonPara2 = processCommonPara(((Element) it10.next()).selectFirst("span").html());
                    KeywordVo keywordVo = new KeywordVo();
                    keywordVo.setKeywordEn(processCommonPara2);
                    arrayList3.add(keywordVo);
                }
                articleVo.setKeywordVos(arrayList3);
            } else if ("CLC numbers".equals(element7.text())) {
                articleVo.setClcNos(((Element) element7.nextElementSibling().select("li.c-article-subject-list__subject").get(0)).text());
            } else if ("Document code".equals(element7.text())) {
                articleVo.setManuscript(((Element) element7.nextElementSibling().select("li.c-article-subject-list__subject").get(0)).text());
            }
        }
        Elements select5 = parse.select("li.c-article-references__item");
        ArrayList arrayList4 = new ArrayList();
        Iterator it11 = select5.iterator();
        while (it11.hasNext()) {
            Element element8 = (Element) it11.next();
            ReferVo referVo = new ReferVo();
            String processInlineformula = processInlineformula(element8.select("p.c-article-references__text").html().replace(" <i>", " <i> ").replace(" </i>", " </i> "));
            String str13 = "";
            str6 = "";
            try {
                if (processInlineformula.indexOf("https://doi") != -1) {
                    str13 = processInlineformula.replaceAll("<a.*?>.*?</a>", "").trim();
                    str6 = processInlineformula.indexOf(">https:") != -1 ? processInlineformula.substring(processInlineformula.indexOf(">https:") + 1, processInlineformula.indexOf("</a>")).trim() : "";
                    if (StringUtils.isEmpty(str6) && processInlineformula.matches(".*?<a.*?>(.*?)</a>")) {
                        str6 = processInlineformula.replaceAll(".*?<a.*?>(.*?)</a>", "$1").trim();
                    }
                    if (str6.indexOf("10.") != -1) {
                        String processCommonPara3 = processCommonPara(str6.substring(str6.indexOf("10.")));
                        if (processCommonPara3.endsWith(".")) {
                            processCommonPara3 = processCommonPara3.substring(0, processCommonPara3.length() - 1).trim();
                        }
                        referVo.setDoi(processCommonPara3);
                    }
                } else if (processInlineformula.indexOf("https://") == -1 && processInlineformula.indexOf("http://") == -1) {
                    str13 = processInlineformula;
                } else {
                    str13 = processInlineformula.replaceAll("<a.*?>", "").replaceAll("</a>", "").trim();
                    if (str13.indexOf("10.") != -1) {
                        String processCommonPara4 = processCommonPara(str13.substring(str13.indexOf("10.")));
                        if (processCommonPara4.endsWith(".")) {
                            processCommonPara4 = processCommonPara4.substring(0, processCommonPara4.length() - 1).trim();
                        }
                        referVo.setDoi(processCommonPara4);
                    }
                }
            } catch (Exception e) {
                System.out.println(str13);
                e.printStackTrace();
            }
            if (StringUtils.isNotEmpty(referVo.getType()) && referVo.getType().equals("journal") && StringUtils.isNotEmpty(referVo.getVolume()) && !referVo.getVolume().matches("\\d+–?\\d*")) {
                referVo.setType("");
            }
            String replace2 = element8.select("p.c-article-references__text").attr("id").replace("ref-CR", "");
            if (StringUtils.isNotEmpty(str6)) {
                str13 = String.valueOf(str13) + " " + str6;
            }
            referVo.setRefId(replace2);
            String processCommonPara5 = processCommonPara(str13);
            referVo.setReferEn(processCommonPara5);
            referVo.setRefHtml("[" + replace2 + "] " + processCommonPara5);
            arrayList4.add(referVo);
        }
        articleVo.setReferVos(arrayList4);
        return articleVo;
    }

    private static String processInlineformula(String str) {
        int i;
        String replaceAll = str.replaceAll("[\\t|\\r|\\n]", "");
        StringBuilder sb = new StringBuilder();
        Matcher matcher = Pattern.compile("<span class=\"mathjax-tex\">(.*?)<\\/script><\\/span>").matcher(replaceAll);
        int i2 = 0;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            int start = matcher.start();
            int end = matcher.end();
            sb.append(replaceAll.substring(i, start));
            Document parse = Jsoup.parse(matcher.group());
            sb.append("<inline-formula><tex-math id=\"E" + parse.selectFirst("script").attr("id").replaceAll(".*?(\\d+)", "$1") + "\">\\begin{document}$ " + parse.selectFirst("script").html().replace("&", "&amp; ").replace("<", " \\lt ").replace(">", " \\gt ") + " $\\end{document}</tex-math></inline-formula>");
            i2 = end;
        }
        if (i != replaceAll.length()) {
            sb.append(replaceAll.substring(i));
        }
        return sb.toString();
    }

    public static String processCommonPara(String str) {
        if (StringUtils.isNotEmpty(str)) {
            str = str.replace("\u2009", "&thinsp;").replace("<", "&lt;").replace(">", "&gt;").replace("&lt;sub&gt;", "<sub>").replace("&lt;/sub&gt;", "</sub>").replace("&lt;sup&gt;", "<sup>").replace("&lt;/sup&gt;", "</sup>").replace("&lt;b&gt;", "<bold>").replace("&lt;/b&gt;", "</bold>").replace("&lt;i&gt;", "<italic>").replace("&lt;/i&gt;", "</italic>").replace("&lt;p&gt;", "<p>").replace("&lt;/p&gt;", "</p>").trim();
        }
        return str;
    }
}
