package com.rhhz.pubplatformspider.parser;

import com.rhhz.pubplatformspider.utils.StringUtils;
import com.rhhz.pubplatformspider.vo.AffiliVo;
import com.rhhz.pubplatformspider.vo.ArticleVo;
import com.rhhz.pubplatformspider.vo.AuthorVo;
import com.rhhz.pubplatformspider.vo.KeywordVo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:com/rhhz/pubplatformspider/parser/CnkiParser.class */
public class CnkiParser {
    public static final String[] SURNAMES = {"欧阳", "太史", "端木", "上官", "司马", "东方", "独孤", "南宫", "万俟", "闻人", "夏侯", "诸葛", "尉迟", "公羊", "赫连", "澹台", "皇甫", "宗政", "濮阳", "公冶", "太叔", "申屠", "公孙", "慕容", "仲孙", "钟离", "长孙", "宇文", "城池", "司徒", "鲜于", "司空", "汝嫣", "闾丘", "子车", "亓官", "司寇", "巫马", "公西", "颛孙", "壤驷", "公良", "漆雕", "乐正", "宰父", "谷梁", "拓跋", "夹谷", "轩辕", "令狐", "段干", "百里", "呼延", "东郭", "南门", "羊舌", "微生", "公户", "公玉", "公仪", "梁丘", "公仲", "公上", "公门", "公山", "公坚", "左丘", "公伯", "西门", "公祖", "第五", "公乘", "贯丘", "公皙", "南荣", "东里", "东宫", "仲长", "子书", "子桑", "即墨", "达奚", "褚师"};

    public static List<String> parseIssuePage(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        Elements select = Jsoup.parse(str).select("dd.row");
        System.out.println("知网上抓取的文章数量:" + select.size());
        if (select != null && select.size() > 0) {
            Iterator it = select.iterator();
            while (it.hasNext()) {
                String attr = ((Element) it.next()).selectFirst("ul.opts>li.btn-view").selectFirst("a").attr("href");
                arrayList.add(attr.substring(attr.indexOf("filename=") + 9, attr.indexOf("&tablename")));
            }
        }
        return arrayList;
    }

    public static ArticleVo parseArticleKnsMeta(String str, ArticleVo articleVo) throws IOException {
        Document parse = Jsoup.parse(str);
        Element selectFirst = parse.selectFirst("div.wx-tit>h1");
        if (StringUtils.containsChinese(selectFirst.html())) {
            articleVo.setTitleCn(selectFirst.html());
        } else {
            articleVo.setTitleEn(selectFirst.html());
        }
        Iterator it = parse.select("div.row").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String text = element.selectFirst("span.rowtit").text();
            if (text.equals("摘要：")) {
                String trim = element.selectFirst("#ChDivSummary").html().trim();
                articleVo.setAbsCn(trim);
                if (!StringUtils.containsChinese(trim)) {
                    articleVo.setAbsEn(trim);
                }
            } else if (text.equals("基金资助：")) {
                String convertSingleByte = StringUtils.convertSingleByte(element.selectFirst("p.funds").text().trim());
                if (convertSingleByte.endsWith(";")) {
                    convertSingleByte = convertSingleByte.substring(0, convertSingleByte.length() - 1);
                }
                if (convertSingleByte.endsWith("]")) {
                    convertSingleByte = convertSingleByte.substring(0, convertSingleByte.length() - 1);
                }
                if (convertSingleByte.startsWith("[")) {
                    convertSingleByte = convertSingleByte.substring(1, convertSingleByte.length());
                }
                String replace = convertSingleByte.replace("&", "&amp;");
                articleVo.setFundCn(replace);
                if (!StringUtils.containsChinese(replace)) {
                    articleVo.setFundEn(replace);
                }
            } else if (text.equals("关键词：")) {
                Elements select = element.select("p.keywords>a");
                ArrayList arrayList = new ArrayList();
                Iterator it2 = select.iterator();
                while (it2.hasNext()) {
                    Element element2 = (Element) it2.next();
                    KeywordVo keywordVo = new KeywordVo();
                    String convertSingleByte2 = StringUtils.convertSingleByte(element2.html());
                    if (convertSingleByte2.endsWith(";")) {
                        convertSingleByte2 = convertSingleByte2.substring(0, convertSingleByte2.length() - 1);
                    }
                    keywordVo.setKeywordCn(convertSingleByte2);
                    arrayList.add(keywordVo);
                }
                articleVo.setKeywordVos(arrayList);
            }
        }
        Iterator it3 = parse.select("li.top-space").iterator();
        while (it3.hasNext()) {
            Element element3 = (Element) it3.next();
            String text2 = element3.selectFirst("span").text();
            String text3 = element3.selectFirst("p").text();
            if (text2.indexOf("DOI") != -1) {
                articleVo.setDoi(text3);
            } else if (text2.indexOf("分类号") != -1) {
                articleVo.setClcNos(text3);
            }
        }
        Iterator it4 = parse.select("p.total-inform").select("span").iterator();
        while (it4.hasNext()) {
            String text4 = ((Element) it4.next()).text();
            if (text4.indexOf("页码") != -1) {
                String trim2 = text4.replace("页码：", "").trim();
                articleVo.setPageRange(trim2);
                String str2 = trim2;
                String str3 = "";
                if (trim2.indexOf("-") != -1) {
                    str2 = trim2.substring(0, trim2.indexOf("-"));
                    str3 = trim2.substring(trim2.indexOf("-") + 1);
                } else {
                    articleVo.setFpage(trim2);
                }
                articleVo.setFpage(str2);
                articleVo.setLpage(str3);
            }
        }
        Elements select2 = parse.select("div.top-tip>span>a");
        String str4 = "";
        String str5 = "";
        String str6 = "";
        if (select2.size() > 1) {
            String text5 = ((Element) select2.get(1)).text();
            if (text5.indexOf(",") != -1) {
                str4 = text5.substring(0, text5.indexOf(","));
                if (text5.indexOf("(") != -1) {
                    str5 = text5.substring(text5.indexOf(",") + 1, text5.indexOf("("));
                    str6 = text5.substring(text5.indexOf("(") + 1, text5.indexOf(")"));
                }
            }
        }
        articleVo.setYear(str4);
        articleVo.setVolume(str5);
        articleVo.setIssue(str6);
        Element selectFirst2 = parse.selectFirst("div.brief");
        ArrayList arrayList2 = new ArrayList();
        Elements select3 = selectFirst2.select("div.wx-tit>h3");
        if (select3.size() > 1 && (((Element) select3.get(1)).selectFirst("a") != null || ((Element) select3.get(1)).selectFirst("span") != null)) {
            List<Element> select4 = ((Element) select3.get(1)).select("a");
            if (select4.size() == 0) {
                select4 = ((Element) select3.get(1)).select("span");
            }
            int i = 1;
            for (Element element4 : select4) {
                AffiliVo affiliVo = new AffiliVo();
                String trim3 = element4.text().trim();
                if (trim3.matches("^[1-9][\\.| ].*?")) {
                    trim3 = trim3.replaceFirst("[1-9][\\.| ]", "").trim();
                }
                if (StringUtils.containsChinese(trim3)) {
                    affiliVo.setAddressStrCn(trim3);
                } else {
                    affiliVo.setAddressStrEn(trim3);
                }
                affiliVo.setLabel(new StringBuilder(String.valueOf(i)).toString());
                affiliVo.setAffiTagId("aff" + i);
                arrayList2.add(affiliVo);
                i++;
            }
        }
        ArrayList arrayList3 = new ArrayList();
        for (Element element5 : selectFirst2.selectFirst("#authorpart").select("span")) {
            AuthorVo authorVo = new AuthorVo();
            Element selectFirst3 = element5.selectFirst("a");
            String convertSingleByte3 = selectFirst3 != null ? StringUtils.convertSingleByte(selectFirst3.html()) : StringUtils.convertSingleByte(element5.html());
            if (convertSingleByte3.indexOf("icon-email") != -1) {
                authorVo.setAuthorType("corresp");
            }
            String replace2 = convertSingleByte3.replaceAll("<i .*?></i>", "").replaceAll("<sup>.*?</sup>", "").replace("\ue76f", "");
            if (replace2.endsWith(")")) {
                replace2 = replace2.substring(0, replace2.indexOf("("));
            }
            authorVo.setAuthorNameCn(replace2);
            if (arrayList2.size() == 0 && convertSingleByte3.indexOf("(") != -1) {
                int i2 = 1;
                for (String str7 : convertSingleByte3.substring(convertSingleByte3.indexOf("(") + 1, convertSingleByte3.length() - 1).split(";")) {
                    AffiliVo affiliVo2 = new AffiliVo();
                    String trim4 = str7.replace("(", "").replace(")", "").trim();
                    if (trim4.matches("^[1-9][\\.| ].*?")) {
                        trim4 = trim4.replaceFirst("[1-9][\\.| ]", "").trim();
                    }
                    affiliVo2.setAddressStrCn(trim4);
                    affiliVo2.setLabel(new StringBuilder(String.valueOf(i2)).toString());
                    affiliVo2.setAffiTagId("aff" + i2);
                    arrayList2.add(affiliVo2);
                    i2++;
                }
            }
            if (convertSingleByte3.indexOf("<sup>") != -1) {
                String text6 = element5.selectFirst("sup").text();
                authorVo.setAddressLabel(text6);
                String str8 = "";
                if (text6.indexOf(",") != -1) {
                    for (String str9 : text6.split(",")) {
                        str8 = String.valueOf(str8) + "aff" + str9 + ",";
                    }
                    authorVo.setAddressTagIds(str8.substring(0, str8.length() - 1));
                } else {
                    authorVo.setAddressTagIds("aff" + text6);
                }
            } else {
                authorVo.setAddressLabel("1");
                authorVo.setAddressTagIds("aff1");
            }
            authorVo.setAddressTags(new ArrayList());
            splitAuthorName(authorVo, replace2);
            arrayList3.add(authorVo);
        }
        articleVo.setAffiliVos(arrayList2);
        articleVo.setAuthorVos(arrayList3);
        return articleVo;
    }

    public static ArticleVo parseArticleOverseaMeta(String str, ArticleVo articleVo) throws IOException {
        Document parse = Jsoup.parse(str);
        articleVo.setTitleEn(parse.selectFirst("div.wx-tit>h1").html().replaceAll("<a class=.*?>.*?</a>", "").trim());
        Iterator it = parse.select("div.row").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String text = element.selectFirst("span.rowtit").text();
            if (text.equals("Abstract：")) {
                String trim = element.selectFirst("#ChDivSummary").html().trim();
                if (trim.endsWith("&nbsp;")) {
                    trim = trim.substring(0, trim.indexOf("&nbsp;"));
                }
                if (!StringUtils.containsChinese(trim)) {
                    articleVo.setAbsEn(trim);
                }
            } else if (text.equals("Fund：")) {
                String convertSingleByte = StringUtils.convertSingleByte(element.selectFirst("p.funds").text().trim());
                if (convertSingleByte.endsWith(";")) {
                    convertSingleByte = convertSingleByte.substring(0, convertSingleByte.length() - 1);
                }
                if (convertSingleByte.endsWith("]")) {
                    convertSingleByte = convertSingleByte.substring(0, convertSingleByte.length() - 1);
                }
                if (convertSingleByte.startsWith("[")) {
                    convertSingleByte = convertSingleByte.substring(1, convertSingleByte.length());
                }
                String replace = convertSingleByte.replace("&", "&amp;");
                if (!StringUtils.containsChinese(replace)) {
                    articleVo.setFundEn(replace);
                }
            } else if (text.equals("Keywords：")) {
                Elements select = element.select("p.keywords>a");
                List<KeywordVo> keywordVos = articleVo.getKeywordVos();
                if (keywordVos.size() > 0) {
                    for (int i = 0; i < select.size(); i++) {
                        String html = ((Element) select.get(i)).html();
                        if (html.endsWith(";")) {
                            html = html.substring(0, html.length() - 1);
                        }
                        if (i < keywordVos.size()) {
                            keywordVos.get(i).setKeywordEn(html);
                        } else {
                            System.err.println("oversea合并关键词数量不对");
                        }
                    }
                } else {
                    for (int i2 = 0; i2 < select.size(); i2++) {
                        String html2 = ((Element) select.get(i2)).html();
                        if (html2.endsWith(";")) {
                            html2 = html2.substring(0, html2.length() - 1);
                        }
                        KeywordVo keywordVo = new KeywordVo();
                        keywordVo.setKeywordEn(html2);
                        keywordVos.add(keywordVo);
                    }
                    articleVo.setKeywordVos(keywordVos);
                }
            }
        }
        Element selectFirst = parse.selectFirst("div.brief");
        List<AffiliVo> affiliVos = articleVo.getAffiliVos();
        Elements select2 = selectFirst.select("div.wx-tit>h3");
        if (select2.size() <= 1) {
            String convertSingleByte2 = StringUtils.convertSingleByte(((Element) select2.get(0)).selectFirst("span").html());
            if (convertSingleByte2.indexOf("(") != -1) {
                String[] split = convertSingleByte2.substring(convertSingleByte2.indexOf("(") + 1, convertSingleByte2.length() - 1).split(";|\\)\\(");
                if (affiliVos.size() == split.length) {
                    for (int i3 = 0; i3 < split.length; i3++) {
                        String trim2 = split[i3].trim();
                        if (trim2.matches("^[1-9][\\.| ].*?")) {
                            trim2 = trim2.replaceFirst("[1-9][\\.| ]", "").trim();
                        }
                        if (!StringUtils.containsChinese(trim2)) {
                            affiliVos.get(i3).setAddressStrEn(trim2);
                        }
                    }
                }
            } else {
                String[] split2 = convertSingleByte2.replace("&amp;", "&&").split(";");
                int size = articleVo.getAuthorVos().size();
                for (int i4 = size; i4 < split2.length; i4++) {
                    String trim3 = split2[i4].trim();
                    if (trim3.matches("^[1-9][\\.| ].*?")) {
                        trim3 = trim3.replaceFirst("[1-9][\\.| ]", "").trim();
                    }
                    if (!StringUtils.containsChinese(trim3)) {
                        affiliVos.get(i4 - size).setAddressStrEn(trim3.replace("&&", "&amp;"));
                    }
                }
            }
        } else if (StringUtils.isNotEmpty(((Element) select2.get(1)).select("a").text()) && affiliVos.size() == select2.size()) {
            Elements select3 = ((Element) select2.get(1)).select("a");
            for (int i5 = 0; i5 < select3.size(); i5++) {
                String trim4 = ((Element) select3.get(i5)).text().trim();
                if (trim4.matches("^[1-9][\\.| ].*?")) {
                    trim4 = trim4.replaceFirst("[1-9][\\.| ]", "").trim();
                }
                if (!StringUtils.containsChinese(trim4)) {
                    affiliVos.get(i5).setAddressStrEn(trim4);
                }
            }
        }
        return articleVo;
    }

    public static void splitAuthorName(AuthorVo authorVo, String str) {
        String trim = str.trim();
        if (!StringUtils.containsChinese(trim)) {
            System.out.println(trim);
            if (StringUtils.isNotEmpty(trim)) {
                if (trim.indexOf(" ") != -1) {
                    String[] split = trim.split(" ");
                    authorVo.setSurNameEn(split[1]);
                    authorVo.setGivenNameEn(split[0]);
                }
                authorVo.setAuthorNameEn(trim);
                return;
            }
            return;
        }
        String trim2 = trim.replace(" ", "").trim();
        authorVo.setAuthorNameCn(trim2);
        String substring = trim2.substring(0, 1);
        String substring2 = trim2.substring(1);
        if (trim2.indexOf("·") != -1) {
            substring2 = trim2.substring(0, trim2.indexOf("·"));
            substring = trim2.substring(trim2.indexOf("·") + 1, trim2.length());
        }
        if (trim2.length() > 2) {
            String substring3 = trim2.substring(0, 2);
            if (Arrays.asList(SURNAMES).contains(substring3)) {
                substring = substring3;
                substring2 = trim2.substring(2);
            }
        }
        authorVo.setSurNameCn(substring);
        authorVo.setGivenNameCn(substring2);
        String str2 = "";
        String upperCase = StringUtils.isNotEmpty(substring) ? StringUtils.convertPinyin(substring, "").toUpperCase() : "";
        if (StringUtils.isNotEmpty(substring2)) {
            String convertPinyin = StringUtils.convertPinyin(substring2, "-");
            str2 = String.valueOf(convertPinyin.substring(0, 1).toUpperCase()) + convertPinyin.substring(1);
        }
        authorVo.setSurNameEn(upperCase);
        authorVo.setGivenNameEn(str2);
        authorVo.setAuthorNameEn(String.valueOf(upperCase) + " " + str2);
    }
}
