package org.elasticsearch.xpack.textstructure.structurefinder;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.stream.Collectors;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;

/* loaded from: input_file:org/elasticsearch/xpack/textstructure/structurefinder/TextStructureFinderManager.class */
public final class TextStructureFinderManager {
    public static final int DEFAULT_IDEAL_SAMPLE_LINE_COUNT = 1000;
    public static final int DEFAULT_LINE_MERGE_SIZE_LIMIT = 10000;
    static final Set<String> FILEBEAT_SUPPORTED_ENCODINGS;
    private static final List<TextStructureFinderFactory> ORDERED_STRUCTURE_FACTORIES;
    private static final int BUFFER_SIZE = 8192;
    private final ScheduledExecutorService scheduler;
    static final /* synthetic */ boolean $assertionsDisabled;

    public TextStructureFinderManager(ScheduledExecutorService scheduledExecutorService) {
        this.scheduler = (ScheduledExecutorService) Objects.requireNonNull(scheduledExecutorService);
    }

    public TextStructureFinder findTextStructure(Integer num, Integer num2, InputStream inputStream) throws Exception {
        return findTextStructure(num, num2, inputStream, TextStructureOverrides.EMPTY_OVERRIDES, null);
    }

    public TextStructureFinder findTextStructure(Integer num, Integer num2, InputStream inputStream, TextStructureOverrides textStructureOverrides, TimeValue timeValue) throws Exception {
        return findTextStructure(new ArrayList(), num == null ? DEFAULT_IDEAL_SAMPLE_LINE_COUNT : num.intValue(), num2 == null ? DEFAULT_LINE_MERGE_SIZE_LIMIT : num2.intValue(), inputStream, textStructureOverrides, timeValue);
    }

    public TextStructureFinder findTextStructure(List<String> list, int i, int i2, InputStream inputStream) throws Exception {
        return findTextStructure(list, i, i2, inputStream, TextStructureOverrides.EMPTY_OVERRIDES, null);
    }

    public TextStructureFinder findTextStructure(List<String> list, int i, int i2, InputStream inputStream, TextStructureOverrides textStructureOverrides, TimeValue timeValue) throws Exception {
        Reader inputStreamReader;
        try {
            TimeoutChecker timeoutChecker = new TimeoutChecker("structure analysis", timeValue, this.scheduler);
            try {
                String charset = textStructureOverrides.getCharset();
                if (charset != null) {
                    try {
                        inputStreamReader = new InputStreamReader(inputStream, charset);
                        list.add("Using specified character encoding [" + charset + "]");
                    } catch (UnsupportedEncodingException e) {
                        throw new IllegalArgumentException("Supplied character encoding [" + charset + "] not available", e);
                    }
                } else {
                    CharsetMatch findCharset = findCharset(list, inputStream, timeoutChecker);
                    charset = findCharset.getName();
                    inputStreamReader = findCharset.getReader();
                }
                if (!$assertionsDisabled && i < 2) {
                    throw new AssertionError();
                }
                Tuple<String, Boolean> sampleText = sampleText(inputStreamReader, charset, 2, i, timeoutChecker);
                TextStructureFinder makeBestStructureFinder = makeBestStructureFinder(list, (String) sampleText.v1(), charset, (Boolean) sampleText.v2(), i2, textStructureOverrides, timeoutChecker);
                timeoutChecker.close();
                return makeBestStructureFinder;
            } finally {
            }
        } catch (Exception e2) {
            if (!list.isEmpty()) {
                e2.addSuppressed(new ElasticsearchException((String) list.stream().collect(Collectors.joining("]\n[", "Explanation so far:\n[", "]\n")), new Object[0]));
            }
            throw e2;
        }
    }

    CharsetMatch findCharset(List<String> list, InputStream inputStream, TimeoutChecker timeoutChecker) throws Exception {
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream, BUFFER_SIZE);
        }
        CharsetMatch[] detectAll = new CharsetDetector().setText(inputStream).detectAll();
        timeoutChecker.check("character set detection");
        boolean z = true;
        int i = 0;
        int i2 = 0;
        inputStream.mark(BUFFER_SIZE);
        byte[] bArr = new byte[BUFFER_SIZE];
        int i3 = BUFFER_SIZE;
        do {
            int read = inputStream.read(bArr, 0, i3);
            if (read <= 0) {
                break;
            }
            for (int i4 = 0; i4 < read; i4++) {
                if (bArr[i4] == 0) {
                    z = false;
                    if (i4 % 2 == 0) {
                        i++;
                    } else {
                        i2++;
                    }
                } else {
                    z = z && bArr[i4] > 0 && bArr[i4] < 128;
                }
            }
            i3 -= read;
        } while (i3 > 0);
        inputStream.reset();
        boolean z2 = i > 0 || i2 > 0;
        timeoutChecker.check("character set detection");
        if (z) {
            Optional findFirst = Arrays.stream(detectAll).filter(charsetMatch -> {
                return StandardCharsets.UTF_8.name().equals(charsetMatch.getName());
            }).findFirst();
            if (findFirst.isPresent()) {
                list.add("Using character encoding [" + StandardCharsets.UTF_8.name() + "], which matched the input with [" + ((CharsetMatch) findFirst.get()).getConfidence() + "%] confidence - first [8kB] of input was pure ASCII");
                return (CharsetMatch) findFirst.get();
            }
        }
        for (CharsetMatch charsetMatch2 : detectAll) {
            String name = charsetMatch2.getName();
            if (Charset.isSupported(name) && FILEBEAT_SUPPORTED_ENCODINGS.contains(name.toLowerCase(Locale.ROOT))) {
                boolean z3 = false;
                Charset forName = Charset.forName(name);
                if (forName.canEncode()) {
                    byte[] bytes = " ".getBytes(forName);
                    for (int i5 = 0; i5 < bytes.length && !z3; i5++) {
                        z3 = bytes[i5] == 0;
                    }
                }
                if (z2 && !z3) {
                    list.add("Character encoding [" + name + "] matched the input with [" + charsetMatch2.getConfidence() + "%] confidence but was rejected as the input contains zero bytes and the [" + name + "] encoding does not");
                } else {
                    if (!z2 || 3 * i2 <= 2 * i || 3 * i <= 2 * i2) {
                        list.add("Using character encoding [" + name + "], which matched the input with [" + charsetMatch2.getConfidence() + "%] confidence");
                        return charsetMatch2;
                    }
                    list.add("Character encoding [" + name + "] matched the input with [" + charsetMatch2.getConfidence() + "%] confidence but was rejected as the distribution of zero bytes between odd and even positions in the text is very close - [" + i + "] and [" + i2 + "] in the first [8kB] of input");
                }
            } else {
                list.add("Character encoding [" + name + "] matched the input with [" + charsetMatch2.getConfidence() + "%] confidence but was rejected as it is not supported by [" + (Charset.isSupported(name) ? "Filebeat" : "the JVM") + "]");
            }
        }
        throw new IllegalArgumentException("Could not determine a usable character encoding for the input" + (z2 ? " - could it be binary data?" : ""));
    }

    TextStructureFinder makeBestStructureFinder(List<String> list, String str, String str2, Boolean bool, int i, TextStructureOverrides textStructureOverrides, TimeoutChecker timeoutChecker) throws Exception {
        List<TextStructureFinderFactory> list2;
        Character delimiter = textStructureOverrides.getDelimiter();
        Character quote = textStructureOverrides.getQuote();
        Boolean shouldTrimFields = textStructureOverrides.getShouldTrimFields();
        double d = 0.0d;
        if (delimiter != null) {
            d = 0.1d;
            list2 = Collections.singletonList(new DelimitedTextStructureFinderFactory(delimiter.charValue(), quote == null ? '\"' : quote.charValue(), 1, shouldTrimFields == null ? delimiter.charValue() == '|' : shouldTrimFields.booleanValue()));
        } else if (quote == null && shouldTrimFields == null && !TextStructure.Format.DELIMITED.equals(textStructureOverrides.getFormat())) {
            list2 = (List) ORDERED_STRUCTURE_FACTORIES.stream().filter(textStructureFinderFactory -> {
                return textStructureFinderFactory.canFindFormat(textStructureOverrides.getFormat());
            }).collect(Collectors.toList());
        } else {
            d = 0.05d;
            list2 = (List) ORDERED_STRUCTURE_FACTORIES.stream().filter(textStructureFinderFactory2 -> {
                return textStructureFinderFactory2 instanceof DelimitedTextStructureFinderFactory;
            }).map(textStructureFinderFactory3 -> {
                return ((DelimitedTextStructureFinderFactory) textStructureFinderFactory3).makeSimilar(quote, shouldTrimFields);
            }).collect(Collectors.toList());
        }
        for (TextStructureFinderFactory textStructureFinderFactory4 : list2) {
            timeoutChecker.check("high level format detection");
            if (textStructureFinderFactory4.canCreateFromSample(list, str, d)) {
                return textStructureFinderFactory4.createFromSample(list, str, str2, bool, i, textStructureOverrides, timeoutChecker);
            }
        }
        throw new IllegalArgumentException("Input did not match " + (textStructureOverrides.getFormat() == null ? "any known formats" : "the specified format [" + textStructureOverrides.getFormat() + "]"));
    }

    private Tuple<String, Boolean> sampleText(Reader reader, String str, int i, int i2, TimeoutChecker timeoutChecker) throws IOException {
        int i3 = 0;
        BufferedReader bufferedReader = new BufferedReader(reader);
        StringBuilder sb = new StringBuilder();
        Boolean bool = null;
        if (str.toUpperCase(Locale.ROOT).startsWith("UTF")) {
            int read = reader.read();
            bool = Boolean.valueOf(((char) read) == 65279);
            if (read >= 0 && !bool.booleanValue() && ((char) read) != '\r') {
                sb.appendCodePoint(read);
                if (((char) read) == '\n') {
                    i3 = 0 + 1;
                }
            }
        }
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            i3++;
            if (i3 > i2) {
                break;
            }
            sb.append(readLine).append('\n');
            timeoutChecker.check("sample line splitting");
        }
        if (i3 < i) {
            throw new IllegalArgumentException("Input contained too few lines [" + i3 + "] to obtain a meaningful sample");
        }
        return new Tuple<>(sb.toString(), bool);
    }

    static {
        $assertionsDisabled = !TextStructureFinderManager.class.desiredAssertionStatus();
        FILEBEAT_SUPPORTED_ENCODINGS = Set.of((Object[]) new String[]{"866", "ansi_x3.4-1968", "arabic", "ascii", "asmo-708", "big5", "big5-hkscs", "chinese", "cn-big5", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "cp819", "cp866", "csbig5", "cseuckr", "cseucpkdfmtjapanese", "csgb2312", "csibm866", "csiso2022jp", "csiso2022kr", "csiso58gb231280", "csiso88596e", "csiso88596i", "csiso88598e", "csiso88598i", "csisolatin1", "csisolatin2", "csisolatin3", "csisolatin4", "csisolatin5", "csisolatin6", "csisolatin9", "csisolatinarabic", "csisolatincyrillic", "csisolatingreek", "csisolatinhebrew", "cskoi8r", "csksc56011987", "csmacintosh", "csshiftjis", "cyrillic", "dos-874", "ecma-114", "ecma-118", "elot_928", "euc-jp", "euc-kr", "gb18030", "gb2312", "gb_2312", "gb_2312-80", "gbk", "greek", "greek8", "hebrew", "hz-gb-2312", "ibm819", "ibm866", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-jp", "iso-2022-kr", "iso-8859-1", "iso-8859-10", "iso-8859-11", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-6-e", "iso-8859-6-i", "iso-8859-7", "iso-8859-8", "iso-8859-8-e", "iso-8859-8-i", "iso-8859-9", "iso-ir-100", "iso-ir-101", "iso-ir-109", "iso-ir-110", "iso-ir-126", "iso-ir-127", "iso-ir-138", "iso-ir-144", "iso-ir-148", "iso-ir-149", "iso-ir-157", "iso-ir-58", "iso8859-1", "iso8859-10", "iso8859-11", "iso8859-13", "iso8859-14", "iso8859-15", "iso8859-2", "iso8859-3", "iso8859-4", "iso8859-5", "iso8859-6", "iso8859-6e", "iso8859-6i", "iso8859-7", "iso8859-8", "iso8859-8e", "iso8859-8i", "iso8859-9", "iso88591", "iso885910", "iso885911", "iso885913", "iso885914", "iso885915", "iso88592", "iso88593", "iso88594", "iso88595", "iso88596", "iso88597", "iso88598", "iso88599", "iso_8859-1", "iso_8859-15", "iso_8859-1:1987", "iso_8859-2", "iso_8859-2:1987", "iso_8859-3", "iso_8859-3:1988", "iso_8859-4", "iso_8859-4:1988", "iso_8859-5", "iso_8859-5:1988", "iso_8859-6", "iso_8859-6:1987", "iso_8859-7", "iso_8859-7:1987", "iso_8859-8", "iso_8859-8:1988", "iso_8859-9", "iso_8859-9:1989", "koi", "koi8", "koi8-r", "koi8-ru", "koi8-u", "koi8_r", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "l1", "l2", "l3", "l4", "l5", "l6", "l9", "latin1", "latin2", "latin3", "latin4", "latin5", "latin6", "logical", "mac", "macintosh", "ms932", "ms_kanji", "shift-jis", "shift_jis", "sjis", "sun_eu_greek", "tis-620", "unicode-1-1-utf-8", "us-ascii", "utf-16", "utf-16-bom", "utf-16be", "utf-16be-bom", "utf-16le", "utf-16le-bom", "utf-8", "utf8", "visual", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "windows-874", "windows-949", "x-cp1250", "x-cp1251", "x-cp1252", "x-cp1253", "x-cp1254", "x-cp1255", "x-cp1256", "x-cp1257", "x-cp1258", "x-euc-jp", "x-gbk", "x-mac-cyrillic", "x-mac-roman", "x-mac-ukrainian", "x-sjis", "x-x-big5"});
        ORDERED_STRUCTURE_FACTORIES = List.of(new NdJsonTextStructureFinderFactory(), new XmlTextStructureFinderFactory(), new DelimitedTextStructureFinderFactory(',', '\"', 2, false), new DelimitedTextStructureFinderFactory('\t', '\"', 2, false), new DelimitedTextStructureFinderFactory(';', '\"', 4, false), new DelimitedTextStructureFinderFactory('|', '\"', 5, true), new LogTextStructureFinderFactory());
    }
}
