/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.encoder;

import ai.grazie.model.nlp.encoder.TokenCounter;
import ai.grazie.nlp.encoder.ByteArrayKey;
import ai.grazie.nlp.encoder.MutablePair;
import ai.grazie.nlp.encoder.UtilsKt;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={2, 1, 0}, k=1, xi=48, d1={"\u00006\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010$\n\u0002\u0018\u0002\n\u0002\u0010\b\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010 \n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0007\b&\u0018\u00002\u00020\u0001B+\u0012\u0012\u0010\u0002\u001a\u000e\u0012\u0004\u0012\u00020\u0004\u0012\u0004\u0012\u00020\u00050\u0003\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\u0006\u0010\b\u001a\u00020\u0005\u00a2\u0006\u0004\b\t\u0010\nJ\u0010\u0010\u000b\u001a\u00020\u00052\u0006\u0010\f\u001a\u00020\rH\u0016J\u0014\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\u00050\u000f2\u0006\u0010\f\u001a\u00020\rJ\u0016\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00050\u000f2\u0006\u0010\u0011\u001a\u00020\u0004H\u0002J\u0016\u0010\u0012\u001a\b\u0012\u0004\u0012\u00020\u00050\u000f2\u0006\u0010\u0011\u001a\u00020\u0004H\u0002JA\u0010\u0013\u001a\u0004\u0018\u00010\u00052\u0006\u0010\u0011\u001a\u00020\u00042\u0018\u0010\u0014\u001a\u0014\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u00150\u000f2\u0006\u0010\u0016\u001a\u00020\u00052\u0006\u0010\u0017\u001a\u00020\u0005H\u0002\u00a2\u0006\u0002\u0010\u0018J'\u0010\u0019\u001a\u0004\u0018\u00010\u00052\u0006\u0010\u0011\u001a\u00020\u00042\u0006\u0010\u0016\u001a\u00020\u00052\u0006\u0010\u001a\u001a\u00020\u0005H\u0002\u00a2\u0006\u0002\u0010\u001bR\u001a\u0010\u0002\u001a\u000e\u0012\u0004\u0012\u00020\u0004\u0012\u0004\u0012\u00020\u00050\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\b\u001a\u00020\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u001c"}, d2={"Lai/grazie/nlp/encoder/TiktokenEncoder;", "Lai/grazie/model/nlp/encoder/TokenCounter;", "vocabulary", "", "Lai/grazie/nlp/encoder/ByteArrayKey;", "", "pattern", "Lkotlin/text/Regex;", "unkTokenId", "<init>", "(Ljava/util/Map;Lkotlin/text/Regex;I)V", "count", "text", "", "encodeAsIds", "", "bytePairEncode", "bytes", "bytePairMerge", "getRank", "parts", "Lai/grazie/nlp/encoder/MutablePair;", "start", "skip", "(Lai/grazie/nlp/encoder/ByteArrayKey;Ljava/util/List;II)Ljava/lang/Integer;", "f", "end", "(Lai/grazie/nlp/encoder/ByteArrayKey;II)Ljava/lang/Integer;", "nlp-encoder-engine"})
@SourceDebugExtension(value={"SMAP\nTiktokenEncoder.kt\nKotlin\n*S Kotlin\n*F\n+ 1 TiktokenEncoder.kt\nai/grazie/nlp/encoder/TiktokenEncoder\n+ 2 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 3 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n+ 4 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,70:1\n1#2:71\n1#2:99\n1818#3,14:72\n1577#4,11:86\n1872#4,2:97\n1874#4:100\n1588#4:101\n*S KotlinDebug\n*F\n+ 1 TiktokenEncoder.kt\nai/grazie/nlp/encoder/TiktokenEncoder\n*L\n54#1:99\n42#1:72,14\n54#1:86,11\n54#1:97,2\n54#1:100\n54#1:101\n*E\n"})
public abstract class TiktokenEncoder
implements TokenCounter {
    @NotNull
    private final Map<ByteArrayKey, Integer> vocabulary;
    @NotNull
    private final Regex pattern;
    private final int unkTokenId;

    public TiktokenEncoder(@NotNull Map<ByteArrayKey, Integer> vocabulary, @NotNull Regex pattern, int unkTokenId) {
        Intrinsics.checkNotNullParameter(vocabulary, (String)"vocabulary");
        Intrinsics.checkNotNullParameter((Object)pattern, (String)"pattern");
        this.vocabulary = vocabulary;
        this.pattern = pattern;
        this.unkTokenId = unkTokenId;
    }

    @Override
    public int count(@NotNull String text2) {
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        return ((Collection)this.encodeAsIds(text2)).size();
    }

    @NotNull
    public final List<Integer> encodeAsIds(@NotNull String text2) {
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        Sequence matches = Regex.findAll$default((Regex)this.pattern, (CharSequence)text2, (int)0, (int)2, null);
        return SequencesKt.toList((Sequence)SequencesKt.flattenSequenceOfIterable((Sequence)SequencesKt.map((Sequence)matches, arg_0 -> TiktokenEncoder.encodeAsIds$lambda$1(this, arg_0))));
    }

    private final List<Integer> bytePairEncode(ByteArrayKey bytes) {
        if (bytes.getSize() == 1) {
            Integer n = this.vocabulary.get(bytes);
            return CollectionsKt.listOf((Object)(n != null ? n : this.unkTokenId));
        }
        return this.bytePairMerge(bytes);
    }

    /*
     * WARNING - void declaration
     */
    private final List<Integer> bytePairMerge(ByteArrayKey bytes) {
        void $this$mapIndexedNotNullTo$iv$iv;
        int n = bytes.getSize() + 1;
        ArrayList<MutablePair<Integer, Integer>> arrayList = new ArrayList<MutablePair<Integer, Integer>>(n);
        int n2 = 0;
        while (n2 < n) {
            void it;
            int n3;
            int n4 = n3 = n2++;
            ArrayList<MutablePair<Integer, Integer>> arrayList2 = arrayList;
            boolean bl = false;
            arrayList2.add(UtilsKt.toM((int)it, Integer.MAX_VALUE));
        }
        List parts = arrayList;
        int n5 = parts.size() - 2;
        for (int i = 0; i < n5; ++i) {
            Integer n6 = this.getRank(bytes, parts, i, 0);
            if (n6 == null) {
                continue;
            }
            int rank = n6;
            ((MutablePair)parts.get(i)).setSecond(rank);
        }
        while (parts.size() > 1) {
            Object e;
            Sequence $this$minBy$iv = SequencesKt.take((Sequence)CollectionsKt.asSequence((Iterable)parts), (int)(parts.size() - 1));
            boolean $i$f$minByOrThrow = false;
            Iterator iterator$iv = $this$minBy$iv.iterator();
            if (!iterator$iv.hasNext()) {
                throw new NoSuchElementException();
            }
            Object minElem$iv = iterator$iv.next();
            if (!iterator$iv.hasNext()) {
                e = minElem$iv;
            } else {
                MutablePair it = (MutablePair)minElem$iv;
                boolean bl = false;
                int minValue$iv = ((Number)it.getSecond()).intValue();
                do {
                    Object e$iv = iterator$iv.next();
                    MutablePair it2 = (MutablePair)e$iv;
                    $i$a$-minByOrThrow-TiktokenEncoder$bytePairMerge$minRank$1 = false;
                    int v$iv = ((Number)it2.getSecond()).intValue();
                    if (minValue$iv <= v$iv) continue;
                    minElem$iv = e$iv;
                    minValue$iv = v$iv;
                } while (iterator$iv.hasNext());
                e = minElem$iv;
            }
            MutablePair minRank = (MutablePair)e;
            if (((Number)minRank.getSecond()).intValue() == Integer.MAX_VALUE) break;
            int i = parts.indexOf(minRank);
            Integer n7 = this.getRank(bytes, parts, i, 1);
            ((MutablePair)parts.get(i)).setSecond(n7 != null ? n7 : Integer.MAX_VALUE);
            if (i > 0) {
                Integer n8 = this.getRank(bytes, parts, i - 1, 1);
                ((MutablePair)parts.get(i - 1)).setSecond(n8 != null ? n8 : Integer.MAX_VALUE);
            }
            MutablePair cfr_ignored_0 = (MutablePair)parts.remove(i + 1);
        }
        Iterable $this$mapIndexedNotNull$iv = parts;
        boolean $i$f$mapIndexedNotNull = false;
        Iterable $i$f$minByOrThrow = $this$mapIndexedNotNull$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$mapIndexedNotNullTo = false;
        void $this$forEachIndexed$iv$iv$iv = $this$mapIndexedNotNullTo$iv$iv;
        boolean $i$f$forEachIndexed = false;
        int index$iv$iv$iv = 0;
        for (Object item$iv$iv$iv : $this$forEachIndexed$iv$iv$iv) {
            Integer it$iv$iv;
            void part;
            void element$iv$iv;
            int n9;
            if ((n9 = index$iv$iv$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            Object t = item$iv$iv$iv;
            int index$iv$iv = n9;
            boolean bl = false;
            MutablePair mutablePair = (MutablePair)element$iv$iv;
            int i = index$iv$iv;
            boolean bl2 = false;
            if ((i >= parts.size() - 1 ? null : this.f(bytes, ((Number)part.getFirst()).intValue(), ((Number)((MutablePair)parts.get(i + 1)).getFirst()).intValue())) == null) continue;
            it$iv$iv = it$iv$iv;
            boolean bl3 = false;
            destination$iv$iv.add(it$iv$iv);
        }
        return (List)destination$iv$iv;
    }

    private final Integer getRank(ByteArrayKey bytes, List<MutablePair<Integer, Integer>> parts, int start2, int skip) {
        return start2 + skip + 2 < parts.size() ? this.vocabulary.get(bytes.range(((Number)parts.get(start2).getFirst()).intValue(), ((Number)parts.get(start2 + skip + 2).getFirst()).intValue())) : null;
    }

    private final Integer f(ByteArrayKey bytes, int start2, int end) {
        return this.vocabulary.get(bytes.range(start2, end));
    }

    private static final List encodeAsIds$lambda$1(TiktokenEncoder this$0, MatchResult match) {
        Object object;
        block3: {
            String piece;
            block2: {
                Intrinsics.checkNotNullParameter((Object)match, (String)"match");
                piece = match.getValue();
                object = this$0.vocabulary.get(UtilsKt.toByteArrayKey(piece));
                if (object == null) break block2;
                int it = ((Number)object).intValue();
                boolean bl = false;
                List list2 = CollectionsKt.listOf((Object)it);
                object = list2;
                if (list2 != null) break block3;
            }
            object = this$0.bytePairEncode(UtilsKt.toByteArrayKey(piece));
        }
        return object;
    }
}

