package com.hankcs.hanlp.classification.features;

import com.hankcs.hanlp.algorithm.MaxHeap;
import com.hankcs.hanlp.classification.corpus.IDataSet;
import com.hankcs.hanlp.classification.statistics.ContinuousDistributions;
import com.sun.xml.bind.v2.runtime.reflect.opt.Const;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.xmlbeans.SchemaType;

/* loaded from: input_file:WEB-INF/lib/hanlp-portable-1.6.8.jar:com/hankcs/hanlp/classification/features/ChiSquareFeatureExtractor.class */
public class ChiSquareFeatureExtractor {
    protected double chisquareCriticalValue = 10.83d;
    protected int maxSize = SchemaType.SIZE_BIG_INTEGER;

    public static BaseFeatureData extractBasicFeatureData(IDataSet iDataSet) {
        return new BaseFeatureData(iDataSet);
    }

    public Map<Integer, Double> chi_square(BaseFeatureData baseFeatureData) {
        Double d;
        HashMap hashMap = new HashMap();
        for (int i = 0; i < baseFeatureData.featureCategoryJointCount.length; i++) {
            int[] iArr = baseFeatureData.featureCategoryJointCount[i];
            int i2 = 0;
            for (int i3 : iArr) {
                i2 += i3;
            }
            int i4 = baseFeatureData.n - i2;
            for (int i5 = 0; i5 < iArr.length; i5++) {
                int i6 = iArr[i5];
                int i7 = i4 - (baseFeatureData.categoryCounts[i5] - i6);
                int i8 = i2 - i6;
                double pow = (baseFeatureData.n * Math.pow((i6 * i7) - (i8 * r0), 2.0d)) / ((((i6 + r0) * (i6 + i8)) * (i8 + i7)) * (r0 + i7));
                if (pow >= this.chisquareCriticalValue && ((d = (Double) hashMap.get(Integer.valueOf(i))) == null || pow > d.doubleValue())) {
                    hashMap.put(Integer.valueOf(i), Double.valueOf(pow));
                }
            }
        }
        if (hashMap.size() == 0) {
            for (int i9 = 0; i9 < baseFeatureData.featureCategoryJointCount.length; i9++) {
                hashMap.put(Integer.valueOf(i9), Double.valueOf(Const.default_value_double));
            }
        }
        if (hashMap.size() > this.maxSize) {
            MaxHeap maxHeap = new MaxHeap(this.maxSize, new Comparator<Map.Entry<Integer, Double>>() { // from class: com.hankcs.hanlp.classification.features.ChiSquareFeatureExtractor.1
                @Override // java.util.Comparator
                public int compare(Map.Entry<Integer, Double> entry, Map.Entry<Integer, Double> entry2) {
                    return entry.getValue().compareTo(entry2.getValue());
                }
            });
            Iterator it = hashMap.entrySet().iterator();
            while (it.hasNext()) {
                maxHeap.add((Map.Entry) it.next());
            }
            hashMap.clear();
            Iterator it2 = maxHeap.iterator();
            while (it2.hasNext()) {
                Map.Entry entry = (Map.Entry) it2.next();
                hashMap.put(entry.getKey(), entry.getValue());
            }
        }
        return hashMap;
    }

    public double getChisquareCriticalValue() {
        return this.chisquareCriticalValue;
    }

    public void setChisquareCriticalValue(double d) {
        this.chisquareCriticalValue = d;
    }

    public ChiSquareFeatureExtractor setALevel(double d) {
        this.chisquareCriticalValue = ContinuousDistributions.ChisquareInverseCdf(d, 1);
        return this;
    }

    public double getALevel() {
        return ContinuousDistributions.ChisquareCdf(this.chisquareCriticalValue, 1);
    }
}
