package org.elasticsearch.xpack.ml.inference.nlp.tokenizers;

import com.ibm.icu.text.BreakIterator;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Locale;
import java.util.Optional;
import java.util.OptionalInt;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.xpack.ml.MachineLearning;

/* loaded from: input_file:org/elasticsearch/xpack/ml/inference/nlp/tokenizers/PrecompiledCharMapNormalizer.class */
public class PrecompiledCharMapNormalizer {
    private final int[] offsets;
    private final byte[] normalizedStrUtf8Bytes;
    private final byte[] reusableCharByteBuffer = new byte[4];

    static PrecompiledCharMapNormalizer fromBase64Str(String str) {
        byte[] decode = Base64.getDecoder().decode(str);
        int i = 0 + 4;
        int i2 = ByteBuffer.wrap(decode, 0, 4).order(ByteOrder.LITTLE_ENDIAN).getInt() / 4;
        int[] iArr = new int[i2];
        for (int i3 = 0; i3 < i2; i3++) {
            iArr[i3] = ByteBuffer.wrap(decode, i, 4).order(ByteOrder.LITTLE_ENDIAN).getInt();
            i += 4;
        }
        return new PrecompiledCharMapNormalizer(iArr, new String(decode, i, decode.length - i, StandardCharsets.UTF_8));
    }

    public PrecompiledCharMapNormalizer(int[] iArr, String str) {
        this.offsets = iArr;
        this.normalizedStrUtf8Bytes = str.getBytes(StandardCharsets.UTF_8);
    }

    private boolean hasLeaf(int i) {
        return ((i >>> 8) & 1) == 1;
    }

    private int label(int i) {
        return i & (-2147483393);
    }

    private int value(int i) {
        return i & Integer.MAX_VALUE;
    }

    private int offset(int i) {
        return (i >>> 10) << ((i & MachineLearning.DEFAULT_MAX_OPEN_JOBS_PER_NODE) >>> 6);
    }

    OptionalInt commonPrefix(byte[] bArr) {
        return commonPrefix(bArr, 0, bArr.length);
    }

    private OptionalInt commonPrefix(byte[] bArr, int i, int i2) {
        OptionalInt empty = OptionalInt.empty();
        int offset = 0 ^ offset(this.offsets[0]);
        for (int i3 = i; i3 < i + i2; i3++) {
            int i4 = bArr[i3];
            if (i4 < 0) {
                i4 += 256;
            }
            if (i4 == 0) {
                break;
            }
            int i5 = offset ^ i4;
            int i6 = this.offsets[i5];
            if (label(i6) != i4) {
                return empty;
            }
            offset = i5 ^ offset(i6);
            if (hasLeaf(i6)) {
                return OptionalInt.of(value(this.offsets[offset]));
            }
        }
        return empty;
    }

    private Optional<BytesRef> normalizePart(byte[] bArr, int i, int i2) {
        OptionalInt commonPrefix = commonPrefix(bArr, i, i2);
        if (commonPrefix.isEmpty()) {
            return Optional.empty();
        }
        int asInt = commonPrefix.getAsInt();
        int i3 = asInt;
        while (i3 < this.normalizedStrUtf8Bytes.length && this.normalizedStrUtf8Bytes[i3] != 0) {
            i3++;
        }
        return i3 == asInt ? Optional.of(new BytesRef(BytesRef.EMPTY_BYTES)) : Optional.of(new BytesRef(this.normalizedStrUtf8Bytes, asInt, i3 - asInt));
    }

    String normalize(String str) {
        return normalize((CharSequence) str).utf8ToString();
    }

    BytesRef normalize(CharSequence charSequence) {
        ByteBuffer encode = StandardCharsets.UTF_8.encode(CharBuffer.wrap(charSequence));
        byte[] bArr = new byte[encode.limit()];
        encode.get(bArr);
        int[] array = charSequence.codePoints().toArray();
        BreakIterator characterInstance = BreakIterator.getCharacterInstance(Locale.ROOT);
        characterInstance.setText(charSequence);
        int first = characterInstance.first();
        int i = 0;
        BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
        bytesRefBuilder.grow(bArr.length);
        int i2 = 0;
        int next = characterInstance.next();
        while (true) {
            int i3 = next;
            if (i3 == -1) {
                return bytesRefBuilder.get();
            }
            int i4 = 0;
            int codePointCount = Character.codePointCount(charSequence, first, i3);
            for (int i5 = i; i5 < codePointCount + i; i5++) {
                i4 += TokenizerUtils.numUtf8Bytes(array[i5]);
            }
            i += codePointCount;
            if (i4 < 6) {
                Optional<BytesRef> normalizePart = normalizePart(bArr, i2, i4);
                if (normalizePart.isPresent()) {
                    bytesRefBuilder.append(normalizePart.get());
                    i2 += i4;
                    first = i3;
                    next = characterInstance.next();
                }
            }
            int i6 = 0;
            for (int i7 = first; i7 < i3; i7++) {
                int numUtf8Bytes = TokenizerUtils.numUtf8Bytes(charSequence.charAt(i7));
                Optional<BytesRef> normalizePart2 = normalizePart(bArr, i6 + i2, numUtf8Bytes);
                if (normalizePart2.isPresent()) {
                    bytesRefBuilder.append(normalizePart2.get());
                } else {
                    bytesRefBuilder.append(this.reusableCharByteBuffer, 0, UnicodeUtil.UTF16toUTF8(charSequence, i7, 1, this.reusableCharByteBuffer));
                }
                i6 += numUtf8Bytes;
            }
            i2 += i4;
            first = i3;
            next = characterInstance.next();
        }
    }
}
