package com.hankcs.hanlp.tokenizer.lexical;

import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.bintrie.BaseNode;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.model.perceptron.tagset.NERTagSet;
import com.hankcs.hanlp.seg.CharacterBasedSegment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.Predefine;
import java.util.LinkedList;
import java.util.List;
import kotlin.text.Typography;

/* loaded from: classes3.dex */
public class AbstractLexicalAnalyzer extends CharacterBasedSegment implements LexicalAnalyzer {
    protected static byte[] typeTable = new byte[CharType.type.length];
    protected boolean enableRuleBasedSegment;
    protected NERecognizer neRecognizer;
    protected POSTagger posTagger;
    protected Segmenter segmenter;

    static {
        byte[] bArr = CharType.type;
        byte[] bArr2 = typeTable;
        System.arraycopy(bArr, 0, bArr2, 0, bArr2.length);
        for (char c : Predefine.CHINESE_NUMBERS.toCharArray()) {
            typeTable[c] = 7;
        }
        typeTable[CharTable.convert(Typography.middleDot)] = 7;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractLexicalAnalyzer() {
        this.enableRuleBasedSegment = false;
        this.config.translatedNameRecognize = false;
        this.config.japaneseNameRecognize = false;
    }

    public AbstractLexicalAnalyzer(Segmenter segmenter) {
        this();
        this.segmenter = segmenter;
    }

    public AbstractLexicalAnalyzer(Segmenter segmenter, POSTagger pOSTagger) {
        this();
        this.segmenter = segmenter;
        this.posTagger = pOSTagger;
    }

    public AbstractLexicalAnalyzer(Segmenter segmenter, POSTagger pOSTagger, NERecognizer nERecognizer) {
        this();
        this.segmenter = segmenter;
        this.posTagger = pOSTagger;
        this.neRecognizer = nERecognizer;
        if (pOSTagger != null) {
            this.config.speechTagging = true;
            if (nERecognizer != null) {
                this.config.ner = true;
            }
        }
    }

    protected static List<CoreDictionary.Attribute> combineWithCustomDictionary(List<String> list) {
        BaseNode<CoreDictionary.Attribute> transition;
        int size = list.size();
        String[] strArr = new String[size];
        list.toArray(strArr);
        CoreDictionary.Attribute[] attributeArr = new CoreDictionary.Attribute[size];
        DoubleArrayTrie<CoreDictionary.Attribute> doubleArrayTrie = CustomDictionary.dat;
        int i = 0;
        while (i < size) {
            int transition2 = doubleArrayTrie.transition(strArr[i], 1);
            if (transition2 > 0) {
                int i2 = i + 1;
                CoreDictionary.Attribute output = doubleArrayTrie.output(transition2);
                int i3 = i2;
                while (i2 < size) {
                    transition2 = doubleArrayTrie.transition(strArr[i2], transition2);
                    if (transition2 < 0) {
                        break;
                    }
                    CoreDictionary.Attribute output2 = doubleArrayTrie.output(transition2);
                    if (output2 != null) {
                        i3 = i2 + 1;
                        output = output2;
                    }
                    i2++;
                }
                if (output != null) {
                    combineWords(strArr, i, i3, attributeArr, output);
                    i = i3 - 1;
                }
            }
            i++;
        }
        if (CustomDictionary.trie != null) {
            int i4 = 0;
            while (i4 < size) {
                if (strArr[i4] != null && (transition = CustomDictionary.trie.transition(strArr[i4], 0)) != null) {
                    int i5 = i4 + 1;
                    CoreDictionary.Attribute value = transition.getValue();
                    int i6 = i5;
                    while (i5 < size) {
                        if (strArr[i5] != null) {
                            transition = transition.transition(strArr[i5], 0);
                            if (transition == null) {
                                break;
                            }
                            if (transition.getValue() != null) {
                                value = transition.getValue();
                                i6 = i5 + 1;
                            }
                        }
                        i5++;
                    }
                    if (value != null) {
                        combineWords(strArr, i4, i6, attributeArr, value);
                        i4 = i6 - 1;
                    }
                }
                i4++;
            }
        }
        list.clear();
        LinkedList linkedList = new LinkedList();
        for (int i7 = 0; i7 < size; i7++) {
            if (strArr[i7] != null) {
                list.add(strArr[i7]);
                linkedList.add(attributeArr[i7]);
            }
        }
        return linkedList;
    }

    private static void combineWords(String[] strArr, int i, int i2, CoreDictionary.Attribute[] attributeArr, CoreDictionary.Attribute attribute) {
        if (i + 1 != i2) {
            StringBuilder sb = new StringBuilder();
            for (int i3 = i; i3 < i2; i3++) {
                if (strArr[i3] != null) {
                    sb.append(strArr[i3]);
                    strArr[i3] = null;
                }
            }
            strArr[i] = sb.toString();
        }
        attributeArr[i] = attribute;
    }

    private void overwriteTag(List<CoreDictionary.Attribute> list, String[] strArr) {
        if (list != null) {
            int i = 0;
            for (CoreDictionary.Attribute attribute : list) {
                if (attribute != null) {
                    strArr[i] = attribute.nature[0].toString();
                }
                i++;
            }
        }
    }

    private void pushPiece(String str, String str2, int i, int i2, byte b, List<String> list) {
        if (b == 7) {
            this.segmenter.segment(str.substring(i, i2), str2.substring(i, i2), list);
        } else {
            list.add(str.substring(i, i2));
        }
    }

    private List<CoreDictionary.Attribute> segmentWithAttribute(String str, String str2, List<String> list) {
        if (!this.config.useCustomDictionary) {
            segmentAfterRule(str, str2, list);
            return null;
        }
        if (!this.config.forceCustomDictionary) {
            segmentAfterRule(str, str2, list);
            return combineWithCustomDictionary(list);
        }
        LinkedList linkedList = new LinkedList();
        segment(str, str2, list, linkedList);
        return linkedList;
    }

    protected boolean acceptCustomWord(int i, int i2, CoreDictionary.Attribute attribute) {
        return this.config.forceCustomDictionary || !(i2 - i < 4 || attribute.hasNatureStartsWith("nr") || attribute.hasNatureStartsWith("ns") || attribute.hasNatureStartsWith("nt"));
    }

    /* JADX WARN: Code restructure failed: missing block: B:23:0x00a8, code lost:
    
        if (r8 == 'O') goto L23;
     */
    @Override // com.hankcs.hanlp.tokenizer.lexical.LexicalAnalyzer
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public com.hankcs.hanlp.corpus.document.sentence.Sentence analyze(java.lang.String r14) {
        /*
            Method dump skipped, instructions count: 321
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.analyze(java.lang.String):com.hankcs.hanlp.corpus.document.sentence.Sentence");
    }

    public AbstractLexicalAnalyzer enableRuleBasedSegment(boolean z) {
        this.enableRuleBasedSegment = z;
        return this;
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.NERecognizer
    public NERTagSet getNERTagSet() {
        return this.neRecognizer.getNERTagSet();
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.NERecognizer
    public String[] recognize(String[] strArr, String[] strArr2) {
        return this.neRecognizer.recognize(strArr, strArr2);
    }

    @Override // com.hankcs.hanlp.seg.CharacterBasedSegment
    protected List<Term> roughSegSentence(char[] cArr) {
        return null;
    }

    /* JADX WARN: Removed duplicated region for block: B:72:0x01ab  */
    /* JADX WARN: Removed duplicated region for block: B:75:0x01bf  */
    @Override // com.hankcs.hanlp.seg.CharacterBasedSegment, com.hankcs.hanlp.seg.Segment
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected java.util.List<com.hankcs.hanlp.seg.common.Term> segSentence(char[] r17) {
        /*
            Method dump skipped, instructions count: 628
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.segSentence(char[]):java.util.List");
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public List<String> segment(String str) {
        return segment(str, CharTable.convert(str));
    }

    public List<String> segment(String str, String str2) {
        LinkedList linkedList = new LinkedList();
        segment(str, str2, linkedList);
        return linkedList;
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public void segment(final String str, final String str2, final List<String> list) {
        if (!this.config.useCustomDictionary) {
            segmentAfterRule(str, str2, list);
            return;
        }
        final int[] iArr = {0};
        CustomDictionary.parseLongestText(str, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() { // from class: com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.2
            @Override // com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie.IHit
            public void hit(int i, int i2, CoreDictionary.Attribute attribute) {
                int[] iArr2 = iArr;
                if (i != iArr2[0]) {
                    AbstractLexicalAnalyzer.this.segmentAfterRule(str.substring(iArr2[0], i), str2.substring(iArr[0], i), list);
                }
                list.add(str.substring(i, i2));
                iArr[0] = i2;
            }
        });
        if (iArr[0] != str.length()) {
            segmentAfterRule(str.substring(iArr[0]), str2.substring(iArr[0]), list);
        }
    }

    protected void segment(final String str, final String str2, final List<String> list, final List<CoreDictionary.Attribute> list2) {
        if (list2 == null) {
            segmentAfterRule(str, str2, list);
            return;
        }
        final int[] iArr = {0};
        CustomDictionary.parseLongestText(str, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() { // from class: com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.1
            static final /* synthetic */ boolean $assertionsDisabled = false;

            @Override // com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie.IHit
            public void hit(int i, int i2, CoreDictionary.Attribute attribute) {
                int[] iArr2 = iArr;
                if (i != iArr2[0]) {
                    AbstractLexicalAnalyzer.this.segmentAfterRule(str.substring(iArr2[0], i), str2.substring(iArr[0], i), list);
                }
                while (list2.size() < list.size()) {
                    list2.add(null);
                }
                list.add(str.substring(i, i2));
                list2.add(attribute);
                iArr[0] = i2;
            }
        });
        if (iArr[0] != str.length()) {
            segmentAfterRule(str.substring(iArr[0]), str2.substring(iArr[0]), list);
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:28:0x0057, code lost:
    
        if ("年月日时分秒".indexOf(r11.charAt(r0)) != (-1)) goto L24;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected void segmentAfterRule(java.lang.String r10, java.lang.String r11, java.util.List<java.lang.String> r12) {
        /*
            r9 = this;
            boolean r0 = r9.enableRuleBasedSegment
            if (r0 != 0) goto La
            com.hankcs.hanlp.tokenizer.lexical.Segmenter r0 = r9.segmenter
            r0.segment(r10, r11, r12)
            return
        La:
            r0 = 0
            byte[] r1 = com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.typeTable
            char r2 = r11.charAt(r0)
            r1 = r1[r2]
            r6 = r1
            r4 = 0
        L15:
            int r0 = r0 + 1
            int r1 = r11.length()
            if (r0 >= r1) goto L65
            byte[] r1 = com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.typeTable
            char r2 = r11.charAt(r0)
            r8 = r1[r2]
            if (r8 == r6) goto L63
            r1 = 9
            if (r6 != r1) goto L5a
            char r2 = r11.charAt(r0)
            java.lang.String r3 = "，,．."
            int r2 = r3.indexOf(r2)
            r3 = -1
            if (r2 == r3) goto L4c
            int r2 = r0 + 1
            int r3 = r11.length()
            if (r2 >= r3) goto L5a
            byte[] r3 = com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.typeTable
            char r2 = r11.charAt(r2)
            r2 = r3[r2]
            if (r2 != r1) goto L5a
            goto L15
        L4c:
            char r1 = r11.charAt(r0)
            java.lang.String r2 = "年月日时分秒"
            int r1 = r2.indexOf(r1)
            if (r1 == r3) goto L5a
            goto L63
        L5a:
            r1 = r9
            r2 = r10
            r3 = r11
            r5 = r0
            r7 = r12
            r1.pushPiece(r2, r3, r4, r5, r6, r7)
            r4 = r0
        L63:
            r6 = r8
            goto L15
        L65:
            int r1 = r11.length()
            if (r0 != r1) goto L73
            r1 = r9
            r2 = r10
            r3 = r11
            r5 = r0
            r7 = r12
            r1.pushPiece(r2, r3, r4, r5, r6, r7)
        L73:
            return
        */
        throw new UnsupportedOperationException("Method not decompiled: com.hankcs.hanlp.tokenizer.lexical.AbstractLexicalAnalyzer.segmentAfterRule(java.lang.String, java.lang.String, java.util.List):void");
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.POSTagger
    public String[] tag(List<String> list) {
        return this.posTagger.tag(list);
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.POSTagger
    public String[] tag(String... strArr) {
        return this.posTagger.tag(strArr);
    }
}
