package com.hankcs.hanlp.mining.word;

import com.hankcs.hanlp.algorithm.MaxHeap;
import com.hankcs.hanlp.utility.LexiconUtility;
import defpackage.C$r8$wrapper$java$util$function$Function$VWRP;
import defpackage.C$r8$wrapper$java$util$function$ToDoubleFunction$VWRP;
import defpackage.C$r8$wrapper$java$util$function$ToIntFunction$VWRP;
import defpackage.C$r8$wrapper$java$util$function$ToLongFunction$VWRP;
import j$.util.Comparator;
import j$.util.function.Function;
import j$.util.function.ToDoubleFunction;
import j$.util.function.ToIntFunction;
import j$.util.function.ToLongFunction;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;

/* loaded from: classes3.dex */
public class NewWordDiscover {
    private boolean filter;
    private int max_word_len;
    private float min_aggregation;
    private float min_entropy;
    private float min_freq;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: com.hankcs.hanlp.mining.word.NewWordDiscover$1, reason: invalid class name */
    /* loaded from: classes3.dex */
    public class AnonymousClass1 implements Comparator<WordInfo>, j$.util.Comparator {
        AnonymousClass1() {
        }

        @Override // java.util.Comparator, j$.util.Comparator
        public int compare(WordInfo wordInfo, WordInfo wordInfo2) {
            return Float.compare(wordInfo.p, wordInfo2.p);
        }

        @Override // java.util.Comparator, j$.util.Comparator
        public /* synthetic */ Comparator<T> reversed() {
            Comparator<T> reverseOrder;
            reverseOrder = Collections.reverseOrder(this);
            return reverseOrder;
        }

        /* JADX WARN: Unknown type variable: T in type: j$.util.function.Function<? super T, ? extends U extends java.lang.Comparable<? super U>> */
        /* JADX WARN: Unknown type variable: T in type: j$.util.function.Function<? super T, ? extends U> */
        /* JADX WARN: Unknown type variable: T in type: java.util.Comparator<T> */
        @Override // j$.util.Comparator
        public /* synthetic */ <U extends Comparable<? super U>> Comparator<T> thenComparing(Function<? super T, ? extends U> function) {
            Comparator<T> thenComparing;
            thenComparing = Comparator.EL.thenComparing(this, Comparator.CC.comparing(function));
            return thenComparing;
        }

        @Override // j$.util.Comparator
        public /* synthetic */ <U> java.util.Comparator<T> thenComparing(Function<? super T, ? extends U> function, java.util.Comparator<? super U> comparator) {
            java.util.Comparator<T> thenComparing;
            thenComparing = Comparator.EL.thenComparing(this, Comparator.CC.comparing(function, comparator));
            return thenComparing;
        }

        @Override // java.util.Comparator, j$.util.Comparator
        public /* synthetic */ java.util.Comparator<T> thenComparing(java.util.Comparator<? super T> comparator) {
            return Comparator.CC.$default$thenComparing(this, comparator);
        }

        @Override // java.util.Comparator
        public /* synthetic */ java.util.Comparator<WordInfo> thenComparing(java.util.function.Function function) {
            return thenComparing(C$r8$wrapper$java$util$function$Function$VWRP.convert(function));
        }

        @Override // java.util.Comparator
        public /* synthetic */ java.util.Comparator<WordInfo> thenComparing(java.util.function.Function function, java.util.Comparator comparator) {
            return thenComparing(C$r8$wrapper$java$util$function$Function$VWRP.convert(function), comparator);
        }

        @Override // j$.util.Comparator
        public /* synthetic */ java.util.Comparator<T> thenComparingDouble(ToDoubleFunction<? super T> toDoubleFunction) {
            java.util.Comparator<T> thenComparing;
            thenComparing = Comparator.EL.thenComparing(this, Comparator.CC.comparingDouble(toDoubleFunction));
            return thenComparing;
        }

        @Override // java.util.Comparator
        public /* synthetic */ java.util.Comparator<WordInfo> thenComparingDouble(java.util.function.ToDoubleFunction<? super WordInfo> toDoubleFunction) {
            return thenComparingDouble(C$r8$wrapper$java$util$function$ToDoubleFunction$VWRP.convert(toDoubleFunction));
        }

        @Override // j$.util.Comparator
        public /* synthetic */ java.util.Comparator<T> thenComparingInt(ToIntFunction<? super T> toIntFunction) {
            java.util.Comparator<T> thenComparing;
            thenComparing = Comparator.EL.thenComparing(this, Comparator.CC.comparingInt(toIntFunction));
            return thenComparing;
        }

        @Override // java.util.Comparator
        public /* synthetic */ java.util.Comparator<WordInfo> thenComparingInt(java.util.function.ToIntFunction<? super WordInfo> toIntFunction) {
            return thenComparingInt(C$r8$wrapper$java$util$function$ToIntFunction$VWRP.convert(toIntFunction));
        }

        @Override // j$.util.Comparator
        public /* synthetic */ java.util.Comparator<T> thenComparingLong(ToLongFunction<? super T> toLongFunction) {
            java.util.Comparator<T> thenComparing;
            thenComparing = Comparator.EL.thenComparing(this, Comparator.CC.comparingLong(toLongFunction));
            return thenComparing;
        }

        @Override // java.util.Comparator
        public /* synthetic */ java.util.Comparator<WordInfo> thenComparingLong(java.util.function.ToLongFunction<? super WordInfo> toLongFunction) {
            return thenComparingLong(C$r8$wrapper$java$util$function$ToLongFunction$VWRP.convert(toLongFunction));
        }
    }

    public NewWordDiscover() {
        this(4, 5.0E-5f, 0.4f, 1.2f, false);
    }

    public NewWordDiscover(int i, float f, float f2, float f3, boolean z) {
        this.max_word_len = i;
        this.min_freq = f;
        this.min_entropy = f2;
        this.min_aggregation = f3;
        this.filter = z;
    }

    public List<WordInfo> discover(BufferedReader bufferedReader, int i) throws IOException {
        Map<String, WordInfo> treeMap = new TreeMap<>();
        Pattern compile = Pattern.compile("[\\s\\d,.<>/?:;'\"\\[\\]{}()\\|~!@#$%^&*\\-_=+，。《》、？：；“”‘’｛｝【】（）…￥！—┄－]+");
        int i2 = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String replaceAll = compile.matcher(readLine).replaceAll("\u0000");
            int length = replaceAll.length();
            int i3 = 0;
            while (i3 < length) {
                int i4 = i3 + 1;
                int min = Math.min(this.max_word_len + i4, length + 1);
                int i5 = i4;
                while (i5 < min) {
                    String substring = replaceAll.substring(i3, i5);
                    if (substring.indexOf(0) < 0) {
                        WordInfo wordInfo = treeMap.get(substring);
                        if (wordInfo == null) {
                            wordInfo = new WordInfo(substring);
                            treeMap.put(substring, wordInfo);
                        }
                        wordInfo.update(i3 == 0 ? (char) 0 : replaceAll.charAt(i3 - 1), i5 < length ? replaceAll.charAt(i5) : (char) 0);
                    }
                    i5++;
                }
                i3 = i4;
            }
            i2 += length;
        }
        Iterator<WordInfo> it = treeMap.values().iterator();
        while (it.hasNext()) {
            it.next().computeProbabilityEntropy(i2);
        }
        Iterator<WordInfo> it2 = treeMap.values().iterator();
        while (it2.hasNext()) {
            it2.next().computeAggregation(treeMap);
        }
        LinkedList linkedList = new LinkedList(treeMap.values());
        ListIterator listIterator = linkedList.listIterator();
        while (listIterator.hasNext()) {
            WordInfo wordInfo2 = (WordInfo) listIterator.next();
            if (wordInfo2.text.trim().length() < 2 || wordInfo2.p < this.min_freq || wordInfo2.entropy < this.min_entropy || wordInfo2.aggregation < this.min_aggregation || (this.filter && LexiconUtility.getFrequency(wordInfo2.text) > 0)) {
                listIterator.remove();
            }
        }
        MaxHeap maxHeap = new MaxHeap(i, new AnonymousClass1());
        maxHeap.addAll(linkedList);
        return maxHeap.toList();
    }

    public List<WordInfo> discover(String str, int i) {
        try {
            return discover(new BufferedReader(new StringReader(str)), i);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
