package com.oplus.dmp.sdk.analyzer.local;

import android.text.TextUtils;
import com.oplus.dmp.sdk.analyzer.bean.AnalyzedTerm;
import com.oplus.dmp.sdk.analyzer.local.tokenizer.ICutWord;
import com.oplus.dmp.sdk.analyzer.local.tokenizer.entity.CutEntity;
import com.oplus.dmp.sdk.common.log.Logger;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes3.dex */
public class LocalAnalyzerHelper {
    private static final Pattern HAN_PATTERN = Pattern.compile("([一-鿕a-zA-Z0-9+#&\\._%\\-]+)");
    private static final Pattern SKIP_PATTERN = Pattern.compile("(\\r\\n|\\s|\\n|\\r)");
    public static final String TAG = "LocalAnalyzerHelper";
    private ICutWord mCutWord;
    private HashSet<String> mStopWordDict;

    public LocalAnalyzerHelper(ICutWord iCutWord, HashSet<String> hashSet) {
        this.mCutWord = iCutWord;
        this.mStopWordDict = hashSet;
    }

    private void dealWithExcludeSpaceOtherSymbol(String str, boolean z10, Set<AnalyzedTerm> set, String str2) {
        int i10 = 0;
        if (z10) {
            while (i10 < str2.length()) {
                String valueOf = String.valueOf(str2.charAt(i10));
                set.add(AnalyzedTerm.buildDefaultInstance(valueOf, str.indexOf(valueOf)));
                i10++;
            }
            return;
        }
        ArrayList arrayList = new ArrayList();
        int i11 = -1;
        while (i10 < str2.length()) {
            String valueOf2 = String.valueOf(str2.charAt(i10));
            String str3 = TAG;
            Logger.d(str3, "otherSymbol : %s", valueOf2);
            if (this.mStopWordDict.contains(valueOf2)) {
                Logger.d(str3, "otherSymbol : %s is stop word dict", valueOf2);
                set.add(AnalyzedTerm.buildDefaultInstance(valueOf2, str.indexOf(valueOf2)));
                arrayList.add(valueOf2);
                if (i11 != -1) {
                    String substring = str2.substring(i11, i10);
                    set.add(AnalyzedTerm.buildDefaultInstance(substring, str.indexOf(substring)));
                    i11 = -1;
                }
            } else if (i11 == -1) {
                i11 = i10;
            }
            i10++;
        }
        if (i11 != -1) {
            String substring2 = str2.substring(i11);
            set.add(AnalyzedTerm.buildDefaultInstance(substring2, str.indexOf(substring2)));
        }
    }

    private void dealWithOtherSymbol(String str, boolean z10, Set<AnalyzedTerm> set) {
        String[] split = HAN_PATTERN.split(str);
        if (split == null || split.length <= 0) {
            Logger.i(TAG, "other symbol is empty", new Object[0]);
            return;
        }
        for (String str2 : split) {
            if (!TextUtils.isEmpty(str2)) {
                Matcher matcher = SKIP_PATTERN.matcher(str2);
                while (matcher.find()) {
                    String group = matcher.group();
                    if (!TextUtils.isEmpty(group)) {
                        set.add(AnalyzedTerm.buildDefaultInstance(group, str.indexOf(group)));
                    }
                }
                Logger.d(TAG, "deal with otherSymbol", new Object[0]);
                String[] split2 = SKIP_PATTERN.split(str2);
                if (split2 != null && split2.length > 0) {
                    for (String str3 : split2) {
                        if (!TextUtils.isEmpty(str3)) {
                            dealWithExcludeSpaceOtherSymbol(str, z10, set, str3);
                        }
                    }
                }
            }
        }
    }

    public List<AnalyzedTerm> analyze(String str, boolean z10) {
        String str2 = TAG;
        Logger.d(str2, "entry,sentence: %s , isCutAll : %s", str, Boolean.valueOf(z10));
        HashSet hashSet = new HashSet();
        if (TextUtils.isEmpty(str)) {
            Logger.i(str2, "sentence is empty", new Object[0]);
            return new ArrayList();
        }
        Matcher matcher = HAN_PATTERN.matcher(str);
        ArrayList arrayList = new ArrayList();
        while (matcher.find()) {
            String group = matcher.group();
            if (!TextUtils.isEmpty(group)) {
                arrayList.add(group);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            CutEntity cut = this.mCutWord.cut((String) it.next());
            Set<AnalyzedTerm> cutTerms = cut.getCutTerms();
            if (cutTerms != null && !cutTerms.isEmpty()) {
                for (AnalyzedTerm analyzedTerm : cutTerms) {
                    analyzedTerm.setCharacterPos(str.indexOf(analyzedTerm.getWord()));
                }
                hashSet.addAll(cutTerms);
            }
            List<String> unCutWords = cut.getUnCutWords();
            if (unCutWords != null && !unCutWords.isEmpty()) {
                for (String str3 : unCutWords) {
                    hashSet.add(AnalyzedTerm.buildDefaultInstance(str3, str.indexOf(str3)));
                }
            }
        }
        dealWithOtherSymbol(str, z10, hashSet);
        return hashSet.isEmpty() ? new ArrayList() : new ArrayList(hashSet);
    }
}
