/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.filestructurefinder;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.stream.Collectors;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
import org.elasticsearch.xpack.ml.filestructurefinder.DelimitedFileStructureFinderFactory;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinder;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinderFactory;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureOverrides;
import org.elasticsearch.xpack.ml.filestructurefinder.NdJsonFileStructureFinderFactory;
import org.elasticsearch.xpack.ml.filestructurefinder.TextLogFileStructureFinderFactory;
import org.elasticsearch.xpack.ml.filestructurefinder.TimeoutChecker;
import org.elasticsearch.xpack.ml.filestructurefinder.XmlFileStructureFinderFactory;

public final class FileStructureFinderManager {
    public static final int MIN_SAMPLE_LINE_COUNT = 2;
    public static final int DEFAULT_IDEAL_SAMPLE_LINE_COUNT = 1000;
    public static final int DEFAULT_LINE_MERGE_SIZE_LIMIT = 10000;
    static final Set<String> FILEBEAT_SUPPORTED_ENCODINGS = Collections.unmodifiableSet(new HashSet<String>(Arrays.asList("866", "ansi_x3.4-1968", "arabic", "ascii", "asmo-708", "big5", "big5-hkscs", "chinese", "cn-big5", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "cp819", "cp866", "csbig5", "cseuckr", "cseucpkdfmtjapanese", "csgb2312", "csibm866", "csiso2022jp", "csiso2022kr", "csiso58gb231280", "csiso88596e", "csiso88596i", "csiso88598e", "csiso88598i", "csisolatin1", "csisolatin2", "csisolatin3", "csisolatin4", "csisolatin5", "csisolatin6", "csisolatin9", "csisolatinarabic", "csisolatincyrillic", "csisolatingreek", "csisolatinhebrew", "cskoi8r", "csksc56011987", "csmacintosh", "csshiftjis", "cyrillic", "dos-874", "ecma-114", "ecma-118", "elot_928", "euc-jp", "euc-kr", "gb18030", "gb2312", "gb_2312", "gb_2312-80", "gbk", "greek", "greek8", "hebrew", "hz-gb-2312", "ibm819", "ibm866", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-jp", "iso-2022-kr", "iso-8859-1", "iso-8859-10", "iso-8859-11", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-6-e", "iso-8859-6-i", "iso-8859-7", "iso-8859-8", "iso-8859-8-e", "iso-8859-8-i", "iso-8859-9", "iso-ir-100", "iso-ir-101", "iso-ir-109", "iso-ir-110", "iso-ir-126", "iso-ir-127", "iso-ir-138", "iso-ir-144", "iso-ir-148", "iso-ir-149", "iso-ir-157", "iso-ir-58", "iso8859-1", "iso8859-10", "iso8859-11", "iso8859-13", "iso8859-14", "iso8859-15", "iso8859-2", "iso8859-3", "iso8859-4", "iso8859-5", "iso8859-6", "iso8859-6e", "iso8859-6i", "iso8859-7", "iso8859-8", "iso8859-8e", "iso8859-8i", "iso8859-9", "iso88591", "iso885910", "iso885911", "iso885913", "iso885914", "iso885915", "iso88592", "iso88593", "iso88594", "iso88595", "iso88596", "iso88597", "iso88598", "iso88599", "iso_8859-1", "iso_8859-15", "iso_8859-1:1987", "iso_8859-2", "iso_8859-2:1987", "iso_8859-3", "iso_8859-3:1988", "iso_8859-4", "iso_8859-4:1988", "iso_8859-5", "iso_8859-5:1988", "iso_8859-6", "iso_8859-6:1987", "iso_8859-7", "iso_8859-7:1987", "iso_8859-8", "iso_8859-8:1988", "iso_8859-9", "iso_8859-9:1989", "koi", "koi8", "koi8-r", "koi8-ru", "koi8-u", "koi8_r", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "l1", "l2", "l3", "l4", "l5", "l6", "l9", "latin1", "latin2", "latin3", "latin4", "latin5", "latin6", "logical", "mac", "macintosh", "ms932", "ms_kanji", "shift-jis", "shift_jis", "sjis", "sun_eu_greek", "tis-620", "unicode-1-1-utf-8", "us-ascii", "utf-16", "utf-16-bom", "utf-16be", "utf-16be-bom", "utf-16le", "utf-16le-bom", "utf-8", "utf8", "visual", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "windows-874", "windows-949", "x-cp1250", "x-cp1251", "x-cp1252", "x-cp1253", "x-cp1254", "x-cp1255", "x-cp1256", "x-cp1257", "x-cp1258", "x-euc-jp", "x-gbk", "x-mac-cyrillic", "x-mac-roman", "x-mac-ukrainian", "x-sjis", "x-x-big5")));
    private static final List<FileStructureFinderFactory> ORDERED_STRUCTURE_FACTORIES = Collections.unmodifiableList(Arrays.asList(new NdJsonFileStructureFinderFactory(), new XmlFileStructureFinderFactory(), new DelimitedFileStructureFinderFactory(',', '\"', 2, false), new DelimitedFileStructureFinderFactory('\t', '\"', 2, false), new DelimitedFileStructureFinderFactory(';', '\"', 4, false), new DelimitedFileStructureFinderFactory('|', '\"', 5, true), new TextLogFileStructureFinderFactory()));
    private static final int BUFFER_SIZE = 8192;
    private final ScheduledExecutorService scheduler;

    public FileStructureFinderManager(ScheduledExecutorService scheduler) {
        this.scheduler = Objects.requireNonNull(scheduler);
    }

    public FileStructureFinder findFileStructure(Integer idealSampleLineCount, Integer lineMergeSizeLimit, InputStream fromFile) throws Exception {
        return this.findFileStructure(idealSampleLineCount, lineMergeSizeLimit, fromFile, FileStructureOverrides.EMPTY_OVERRIDES, null);
    }

    public FileStructureFinder findFileStructure(Integer idealSampleLineCount, Integer lineMergeSizeLimit, InputStream fromFile, FileStructureOverrides overrides, TimeValue timeout) throws Exception {
        return this.findFileStructure(new ArrayList<String>(), idealSampleLineCount == null ? 1000 : idealSampleLineCount, lineMergeSizeLimit == null ? 10000 : lineMergeSizeLimit, fromFile, overrides, timeout);
    }

    public FileStructureFinder findFileStructure(List<String> explanation, int idealSampleLineCount, int lineMergeSizeLimit, InputStream fromFile) throws Exception {
        return this.findFileStructure(explanation, idealSampleLineCount, lineMergeSizeLimit, fromFile, FileStructureOverrides.EMPTY_OVERRIDES, null);
    }

    public FileStructureFinder findFileStructure(List<String> explanation, int idealSampleLineCount, int lineMergeSizeLimit, InputStream fromFile, FileStructureOverrides overrides, TimeValue timeout) throws Exception {
        TimeoutChecker timeoutChecker = new TimeoutChecker("structure analysis", timeout, this.scheduler);
        try {
            Reader sampleReader;
            String charsetName = overrides.getCharset();
            if (charsetName != null) {
                sampleReader = new InputStreamReader(fromFile, charsetName);
                explanation.add("Using specified character encoding [" + charsetName + "]");
            } else {
                CharsetMatch charsetMatch = this.findCharset(explanation, fromFile, timeoutChecker);
                charsetName = charsetMatch.getName();
                sampleReader = charsetMatch.getReader();
            }
            Tuple<String, Boolean> sampleInfo = this.sampleFile(sampleReader, charsetName, 2, Math.max(2, idealSampleLineCount), timeoutChecker);
            FileStructureFinder fileStructureFinder = this.makeBestStructureFinder(explanation, (String)sampleInfo.v1(), charsetName, (Boolean)sampleInfo.v2(), lineMergeSizeLimit, overrides, timeoutChecker);
            timeoutChecker.close();
            return fileStructureFinder;
        }
        catch (Throwable throwable) {
            try {
                try {
                    timeoutChecker.close();
                }
                catch (Throwable throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
            catch (Exception e) {
                if (!explanation.isEmpty()) {
                    e.addSuppressed(new ElasticsearchException(explanation.stream().collect(Collectors.joining("]\n[", "Explanation so far:\n[", "]\n")), new Object[0]));
                }
                throw e;
            }
        }
    }

    CharsetMatch findCharset(List<String> explanation, InputStream inputStream, TimeoutChecker timeoutChecker) throws Exception {
        Optional<CharsetMatch> utf8CharsetMatch;
        int bytesRead;
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream, 8192);
        }
        CharsetDetector charsetDetector = new CharsetDetector().setText(inputStream);
        CharsetMatch[] charsetMatches = charsetDetector.detectAll();
        timeoutChecker.check("character set detection");
        boolean pureAscii = true;
        int evenPosZeroCount = 0;
        int oddPosZeroCount = 0;
        inputStream.mark(8192);
        byte[] workspace = new byte[8192];
        int remainingLength = 8192;
        while ((bytesRead = inputStream.read(workspace, 0, remainingLength)) > 0) {
            for (int i = 0; i < bytesRead; ++i) {
                if (workspace[i] == 0) {
                    pureAscii = false;
                    if (i % 2 == 0) {
                        ++evenPosZeroCount;
                        continue;
                    }
                    ++oddPosZeroCount;
                    continue;
                }
                pureAscii = pureAscii && workspace[i] > 0 && workspace[i] < 128;
            }
            if ((remainingLength -= bytesRead) > 0) continue;
        }
        inputStream.reset();
        boolean containsZeroBytes = evenPosZeroCount > 0 || oddPosZeroCount > 0;
        timeoutChecker.check("character set detection");
        if (pureAscii && (utf8CharsetMatch = Arrays.stream(charsetMatches).filter(charsetMatch -> StandardCharsets.UTF_8.name().equals(charsetMatch.getName())).findFirst()).isPresent()) {
            explanation.add("Using character encoding [" + StandardCharsets.UTF_8.name() + "], which matched the input with [" + utf8CharsetMatch.get().getConfidence() + "%] confidence - first [" + 8 + "kB] of input was pure ASCII");
            return utf8CharsetMatch.get();
        }
        for (CharsetMatch charsetMatch2 : charsetMatches) {
            String name = charsetMatch2.getName();
            if (Charset.isSupported(name) && FILEBEAT_SUPPORTED_ENCODINGS.contains(name.toLowerCase(Locale.ROOT))) {
                boolean spaceEncodingContainsZeroByte = false;
                Charset charset = Charset.forName(name);
                if (charset.canEncode()) {
                    byte[] spaceBytes = " ".getBytes(charset);
                    for (int i = 0; i < spaceBytes.length && !spaceEncodingContainsZeroByte; ++i) {
                        spaceEncodingContainsZeroByte = spaceBytes[i] == 0;
                    }
                }
                if (containsZeroBytes && !spaceEncodingContainsZeroByte) {
                    explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch2.getConfidence() + "%] confidence but was rejected as the input contains zero bytes and the [" + name + "] encoding does not");
                    continue;
                }
                if (containsZeroBytes && 3 * oddPosZeroCount > 2 * evenPosZeroCount && 3 * evenPosZeroCount > 2 * oddPosZeroCount) {
                    explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch2.getConfidence() + "%] confidence but was rejected as the distribution of zero bytes between odd and even positions in the file is very close - [" + evenPosZeroCount + "] and [" + oddPosZeroCount + "] in the first [" + 8 + "kB] of input");
                    continue;
                }
                explanation.add("Using character encoding [" + name + "], which matched the input with [" + charsetMatch2.getConfidence() + "%] confidence");
                return charsetMatch2;
            }
            explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch2.getConfidence() + "%] confidence but was rejected as it is not supported by [" + (Charset.isSupported(name) ? "Filebeat" : "the JVM") + "]");
        }
        throw new IllegalArgumentException("Could not determine a usable character encoding for the input" + (containsZeroBytes ? " - could it be binary data?" : ""));
    }

    FileStructureFinder makeBestStructureFinder(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker, int lineMergeSizeLimit, FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws Exception {
        List<Object> factories;
        Character delimiter = overrides.getDelimiter();
        Character quote = overrides.getQuote();
        Boolean shouldTrimFields = overrides.getShouldTrimFields();
        double allowedFractionOfBadLines = 0.0;
        if (delimiter != null) {
            allowedFractionOfBadLines = 0.1;
            factories = Collections.singletonList(new DelimitedFileStructureFinderFactory(delimiter.charValue(), quote == null ? (char)'\"' : (char)quote.charValue(), 1, shouldTrimFields == null ? delimiter.charValue() == '|' : shouldTrimFields));
        } else if (quote != null || shouldTrimFields != null || FileStructure.Format.DELIMITED.equals((Object)overrides.getFormat())) {
            allowedFractionOfBadLines = 0.05;
            factories = ORDERED_STRUCTURE_FACTORIES.stream().filter(factory -> factory instanceof DelimitedFileStructureFinderFactory).map(factory -> ((DelimitedFileStructureFinderFactory)factory).makeSimilar(quote, shouldTrimFields)).collect(Collectors.toList());
        } else {
            factories = ORDERED_STRUCTURE_FACTORIES.stream().filter(factory -> factory.canFindFormat(overrides.getFormat())).collect(Collectors.toList());
        }
        for (FileStructureFinderFactory fileStructureFinderFactory : factories) {
            timeoutChecker.check("high level format detection");
            if (!fileStructureFinderFactory.canCreateFromSample(explanation, sample, allowedFractionOfBadLines)) continue;
            return fileStructureFinderFactory.createFromSample(explanation, sample, charsetName, hasByteOrderMarker, lineMergeSizeLimit, overrides, timeoutChecker);
        }
        throw new IllegalArgumentException("Input did not match " + (overrides.getFormat() == null ? "any known formats" : "the specified format [" + overrides.getFormat() + "]"));
    }

    private Tuple<String, Boolean> sampleFile(Reader reader, String charsetName, int minLines, int maxLines, TimeoutChecker timeoutChecker) throws IOException {
        String line;
        int lineCount = 0;
        BufferedReader bufferedReader = new BufferedReader(reader);
        StringBuilder sample = new StringBuilder();
        Boolean hasByteOrderMarker = null;
        if (charsetName.toUpperCase(Locale.ROOT).startsWith("UTF")) {
            int maybeByteOrderMarker = reader.read();
            hasByteOrderMarker = (char)maybeByteOrderMarker == '\ufeff';
            if (maybeByteOrderMarker >= 0 && !hasByteOrderMarker.booleanValue() && (char)maybeByteOrderMarker != '\r') {
                sample.appendCodePoint(maybeByteOrderMarker);
                if ((char)maybeByteOrderMarker == '\n') {
                    ++lineCount;
                }
            }
        }
        while ((line = bufferedReader.readLine()) != null && ++lineCount <= maxLines) {
            sample.append(line).append('\n');
            timeoutChecker.check("sample line splitting");
        }
        if (lineCount < minLines) {
            throw new IllegalArgumentException("Input contained too few lines [" + lineCount + "] to obtain a meaningful sample");
        }
        return new Tuple((Object)sample.toString(), (Object)hasByteOrderMarker);
    }
}

