package org.apache.tika.language;

import de.schlichtherle.truezip.entry.EntryName;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.io.IOUtils;

/* loaded from: input_file:org/apache/tika/language/LanguageProfilerBuilder.class */
public class LanguageProfilerBuilder {
    private static final String a = "_";
    private String b;
    private int d;
    private int e;
    private Map g;
    private List c = null;
    private int[] f = null;
    private b h = new b();

    private LanguageProfilerBuilder(String str, int i, int i2) {
        this.b = null;
        this.d = 3;
        this.e = 3;
        this.g = null;
        this.g = new HashMap(4000);
        this.d = 3;
        this.e = 3;
        this.b = str;
    }

    private void a(b bVar) {
        int length = bVar.length();
        if (length >= this.d) {
            int min = Math.min(this.e, length);
            for (int i = this.d; i <= min; i++) {
                CharSequence subSequence = bVar.subSequence(length - i, length);
                if (!subSequence.equals(a)) {
                    a aVar = (a) this.g.get(subSequence);
                    a aVar2 = aVar;
                    if (aVar == null) {
                        aVar2 = new a(subSequence);
                        this.g.put(subSequence, aVar2);
                    }
                    aVar2.c();
                }
            }
        }
    }

    private void a(StringBuilder sb) {
        if (this.g != null) {
            this.g.clear();
            this.c = null;
            this.f = null;
        }
        this.h.a().a('_');
        for (int i = 0; i < sb.length(); i++) {
            char lowerCase = Character.toLowerCase(sb.charAt(i));
            if (Character.isLetter(lowerCase)) {
                a(this.h.a(lowerCase));
            } else if (this.h.length() > 1) {
                a(this.h.a('_'));
                this.h.a().a('_');
            }
        }
        if (this.h.length() > 1) {
            a(this.h.a('_'));
        }
        a();
    }

    private void a() {
        int i;
        int i2;
        if (this.f == null) {
            this.f = new int[this.e + 1];
            for (a aVar : this.g.values()) {
                int[] iArr = this.f;
                int b = aVar.b();
                int i3 = iArr[b];
                i2 = aVar.b;
                iArr[b] = i3 + i2;
            }
        }
        for (a aVar2 : this.g.values()) {
            i = aVar2.b;
            aVar2.c = i / this.f[aVar2.b()];
        }
    }

    private List b() {
        if (this.c == null) {
            this.c = new ArrayList(this.g.values());
            Collections.sort(this.c);
            if (this.c.size() > 1000) {
                this.c = this.c.subList(0, 1000);
            }
        }
        return this.c;
    }

    public String toString() {
        int i;
        float f;
        StringBuffer append = new StringBuffer("NGramProfile: ").append(this.b).append(IOUtils.LINE_SEPARATOR_UNIX);
        for (a aVar : b()) {
            StringBuffer append2 = append.append("[").append(aVar.a).append(EntryName.SEPARATOR);
            i = aVar.b;
            StringBuffer append3 = append2.append(i).append(EntryName.SEPARATOR);
            f = aVar.c;
            append3.append(f).append("]\n");
        }
        return append.toString();
    }

    private float a(LanguageProfilerBuilder languageProfilerBuilder) {
        float f;
        float f2;
        float f3;
        float f4;
        float f5;
        float f6;
        float f7 = 0.0f;
        try {
            for (a aVar : languageProfilerBuilder.b()) {
                if (this.g.containsKey(aVar.a)) {
                    f4 = aVar.c;
                    f5 = ((a) this.g.get(aVar.a)).c;
                    f7 += Math.abs(f4 - f5) / 2.0f;
                } else {
                    f6 = aVar.c;
                    f7 += f6;
                }
            }
            for (a aVar2 : b()) {
                if (languageProfilerBuilder.g.containsKey(aVar2.a)) {
                    f = aVar2.c;
                    f2 = ((a) languageProfilerBuilder.g.get(aVar2.a)).c;
                    f7 += Math.abs(f - f2) / 2.0f;
                } else {
                    f3 = aVar2.c;
                    f7 += f3;
                }
            }
            return f7;
        } catch (Exception unused) {
            throw new org.apache.tika.d.b("Could not calculate a score how well NGramProfiles match each other");
        }
    }

    private void a(InputStream inputStream) {
        this.g.clear();
        this.f = new int[this.e + 1];
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                a();
                return;
            }
            if (readLine.charAt(0) != '#') {
                int indexOf = readLine.indexOf(32);
                String trim = readLine.substring(0, indexOf).trim();
                int length = trim.length();
                if (length >= this.d && length <= this.e) {
                    int parseInt = Integer.parseInt(readLine.substring(indexOf + 1));
                    a aVar = new a(trim, parseInt);
                    this.g.put(aVar.a, aVar);
                    int[] iArr = this.f;
                    iArr[length] = iArr[length] + parseInt;
                }
            }
        }
    }

    private static LanguageProfilerBuilder a(String str, InputStream inputStream, String str2) {
        LanguageProfilerBuilder languageProfilerBuilder = new LanguageProfilerBuilder(str, 3, 3);
        BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
        byte[] bArr = new byte[4096];
        StringBuilder sb = new StringBuilder();
        while (true) {
            try {
                int read = bufferedInputStream.read(bArr);
                if (read == -1) {
                    languageProfilerBuilder.a(sb);
                    return languageProfilerBuilder;
                }
                sb.append(new String(bArr, 0, read, str2));
            } catch (IOException e) {
                throw new org.apache.tika.d.b("Could not create profile, " + e.getMessage());
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v32, types: [java.util.List] */
    private void a(OutputStream outputStream) {
        outputStream.write(("# NgramProfile generated at " + new Date() + " for Apache Tika Language Identification\n").getBytes(StandardCharsets.UTF_8));
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        a[] aVarArr = (a[]) this.g.values().toArray(new a[this.g.size()]);
        for (int i = this.d; i <= this.e; i++) {
            for (int i2 = 0; i2 < aVarArr.length; i2++) {
                if (aVarArr[i2].a.length() == i) {
                    arrayList2.add(aVarArr[i2]);
                }
            }
            Collections.sort(arrayList2);
            if (arrayList2.size() > 1000) {
                arrayList2 = arrayList2.subList(0, 1000);
            }
            arrayList.addAll(arrayList2);
            arrayList2.clear();
        }
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            a aVar = (a) arrayList.get(i3);
            outputStream.write((String.valueOf(aVar.toString()) + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + aVar.a() + IOUtils.LINE_SEPARATOR_UNIX).getBytes(StandardCharsets.UTF_8));
        }
        outputStream.flush();
    }

    public static void main(String[] strArr) {
        Exception exc = null;
        String str = "";
        String str2 = "";
        String str3 = "";
        String str4 = "";
        if (strArr.length == 0) {
            System.err.println("Usage: NGramProfile [-create profilename filename encoding] [-similarity file1 file2] [-score profile-name filename encoding]");
            System.exit(-1);
        }
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].equals("-create")) {
                exc = 1;
                int i2 = i + 1;
                str = strArr[i2];
                int i3 = i2 + 1;
                str2 = strArr[i3];
                i = i3 + 1;
                str4 = strArr[i];
            }
            if (strArr[i].equals("-similarity")) {
                exc = 2;
                int i4 = i + 1;
                str2 = strArr[i4];
                int i5 = i4 + 1;
                str3 = strArr[i5];
                i = i5 + 1;
                str4 = strArr[i];
            }
            if (strArr[i].equals("-score")) {
                exc = 3;
                int i6 = i + 1;
                str = strArr[i6];
                int i7 = i6 + 1;
                str2 = strArr[i7];
                i = i7 + 1;
                str4 = strArr[i];
            }
            i++;
        }
        Exception exc2 = exc;
        try {
            switch (exc2) {
                case 1:
                    FileInputStream fileInputStream = new FileInputStream(new File(str2));
                    LanguageProfilerBuilder a2 = a(str, fileInputStream, str4);
                    fileInputStream.close();
                    a2.a(new FileOutputStream(new File(String.valueOf(str) + ".ngp")));
                    System.out.println("new profile " + str + ".ngp was created.");
                    return;
                case 2:
                    LanguageProfilerBuilder a3 = a(str2, new FileInputStream(new File(str2)), str4);
                    a3.a();
                    LanguageProfilerBuilder a4 = a(str3, new FileInputStream(new File(str3)), str4);
                    a4.a();
                    System.out.println("Similarity is " + a3.a(a4));
                    return;
                case 3:
                    LanguageProfilerBuilder a5 = a(str2, new FileInputStream(new File(str2)), str4);
                    FileInputStream fileInputStream2 = new FileInputStream(new File(String.valueOf(str) + ".ngp"));
                    LanguageProfilerBuilder languageProfilerBuilder = new LanguageProfilerBuilder(str, 3, 3);
                    languageProfilerBuilder.a(fileInputStream2);
                    System.out.println("Score is " + languageProfilerBuilder.a(a5));
                    return;
                default:
                    return;
            }
        } catch (Exception e) {
            exc2.printStackTrace();
        }
    }
}
