/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.tokenizer;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.omegat.core.Core;
import org.omegat.core.data.IProject;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.gui.comments.ICommentProvider;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.Tokenizer;
import org.omegat.tokenizer.WordIterator;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.StringUtil;
import org.omegat.util.Token;

public abstract class BaseTokenizer
implements ITokenizer {
    protected static final String[] EMPTY_STRING_LIST = new String[0];
    protected static final Token[] EMPTY_TOKENS_LIST = new Token[0];
    protected static final int DEFAULT_TOKENS_COUNT = 64;
    private final Map<String, Token[]> tokenCacheNone = new ConcurrentHashMap<String, Token[]>(2500);
    private final Map<String, Token[]> tokenCacheMatching = new ConcurrentHashMap<String, Token[]>(2500);
    private final Map<String, Token[]> tokenCacheGlossary = new ConcurrentHashMap<String, Token[]>(2500);
    protected boolean shouldDelegateTokenizeExactly = true;
    public static final ICommentProvider TOKENIZER_DEBUG_PROVIDER = new ICommentProvider(){

        @Override
        public String getComment(SourceTextEntry newEntry) {
            return ((BaseTokenizer)Core.getProject().getSourceTokenizer()).test(newEntry.getSrcText());
        }
    };

    @Override
    public Token[] tokenizeWords(String strOrig, ITokenizer.StemmingMode stemmingMode) {
        Map<String, Token[]> cache;
        switch (stemmingMode) {
            case NONE: {
                cache = this.tokenCacheNone;
                break;
            }
            case GLOSSARY: {
                cache = this.tokenCacheGlossary;
                break;
            }
            case MATCHING: {
                cache = this.tokenCacheMatching;
                break;
            }
            default: {
                throw new RuntimeException("No cache for specified stemming mode");
            }
        }
        Token[] result = cache.get(strOrig);
        if (result != null) {
            return result;
        }
        result = this.tokenize(strOrig, stemmingMode == ITokenizer.StemmingMode.GLOSSARY || stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode != ITokenizer.StemmingMode.GLOSSARY, true);
        cache.put(strOrig, result);
        return result;
    }

    @Override
    public String[] tokenizeWordsToStrings(String str, ITokenizer.StemmingMode stemmingMode) {
        return this.tokenizeToStrings(str, stemmingMode == ITokenizer.StemmingMode.GLOSSARY || stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode == ITokenizer.StemmingMode.MATCHING, stemmingMode != ITokenizer.StemmingMode.GLOSSARY, true);
    }

    @Override
    public Token[] tokenizeVerbatim(String strOrig) {
        if (StringUtil.isEmpty(strOrig)) {
            return EMPTY_TOKENS_LIST;
        }
        if (!this.shouldDelegateTokenizeExactly) {
            return this.tokenize(strOrig, false, false, false, false);
        }
        ArrayList<Token> result = new ArrayList<Token>(64);
        WordIterator iterator = new WordIterator();
        iterator.setText(strOrig);
        int start = iterator.first();
        int end = iterator.next();
        while (end != -1) {
            String tokenStr = strOrig.substring(start, end);
            result.add(new Token(tokenStr, start));
            start = end;
            end = iterator.next();
        }
        return result.toArray(new Token[result.size()]);
    }

    @Override
    public String[] tokenizeVerbatimToStrings(String str) {
        if (StringUtil.isEmpty(str)) {
            return EMPTY_STRING_LIST;
        }
        if (!this.shouldDelegateTokenizeExactly) {
            return this.tokenizeToStrings(str, false, false, false, false);
        }
        ArrayList<String> result = new ArrayList<String>(64);
        WordIterator iterator = new WordIterator();
        iterator.setText(str);
        int start = iterator.first();
        int end = iterator.next();
        while (end != -1) {
            String tokenStr = str.substring(start, end);
            result.add(tokenStr);
            start = end;
            end = iterator.next();
        }
        return result.toArray(new String[result.size()]);
    }

    protected Token[] tokenizeByCodePoint(String strOrig) {
        int cp;
        Token[] tokens = new Token[strOrig.codePointCount(0, strOrig.length())];
        int j = 0;
        for (int i = 0; i < strOrig.length(); i += Character.charCount(cp)) {
            cp = strOrig.codePointAt(i);
            tokens[j++] = new Token(String.valueOf(Character.toChars(cp)), i);
        }
        return tokens;
    }

    protected String[] tokenizeByCodePointToStrings(String strOrig) {
        int cp;
        String[] tokens = new String[strOrig.codePointCount(0, strOrig.length())];
        int j = 0;
        for (int i = 0; i < strOrig.length(); i += Character.charCount(cp)) {
            cp = strOrig.codePointAt(i);
            tokens[j++] = String.valueOf(Character.toChars(cp));
        }
        return tokens;
    }

    protected Token[] tokenize(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed, boolean filterDigits, boolean filterWhitespace) {
        if (StringUtil.isEmpty(strOrig)) {
            return EMPTY_TOKENS_LIST;
        }
        ArrayList<Token> result = new ArrayList<Token>(64);
        try (TokenStream in = this.getTokenStream(strOrig, stemsAllowed, stopWordsAllowed);){
            in.addAttribute(CharTermAttribute.class);
            in.addAttribute(OffsetAttribute.class);
            CharTermAttribute cattr = (CharTermAttribute)in.getAttribute(CharTermAttribute.class);
            OffsetAttribute off = (OffsetAttribute)in.getAttribute(OffsetAttribute.class);
            in.reset();
            while (in.incrementToken()) {
                String tokenText = cattr.toString();
                if (!this.acceptToken(tokenText, filterDigits, filterWhitespace)) continue;
                result.add(new Token(tokenText, off.startOffset(), off.endOffset() - off.startOffset()));
            }
            in.end();
        }
        catch (IOException ex) {
            Log.log(ex);
        }
        return result.toArray(new Token[result.size()]);
    }

    protected String[] tokenizeToStrings(String str, boolean stemsAllowed, boolean stopWordsAllowed, boolean filterDigits, boolean filterWhitespace) {
        if (StringUtil.isEmpty(str)) {
            return EMPTY_STRING_LIST;
        }
        ArrayList<String> result = new ArrayList<String>(64);
        try (TokenStream in = this.getTokenStream(str, stemsAllowed, stopWordsAllowed);){
            in.addAttribute(CharTermAttribute.class);
            in.addAttribute(OffsetAttribute.class);
            CharTermAttribute cattr = (CharTermAttribute)in.getAttribute(CharTermAttribute.class);
            OffsetAttribute off = (OffsetAttribute)in.getAttribute(OffsetAttribute.class);
            Locale loc = stemsAllowed ? this.getEffectiveLanguage().getLocale() : null;
            in.reset();
            while (in.incrementToken()) {
                String origText;
                String tokenText = cattr.toString();
                if (!this.acceptToken(tokenText, filterDigits, filterWhitespace)) continue;
                result.add(tokenText);
                if (!stemsAllowed || (origText = str.substring(off.startOffset(), off.endOffset())).toLowerCase(loc).equals(tokenText.toLowerCase(loc))) continue;
                result.add(origText);
            }
            in.end();
        }
        catch (IOException ex) {
            Log.log(ex);
        }
        return result.toArray(new String[result.size()]);
    }

    private boolean acceptToken(String token, boolean filterDigits, boolean filterWhitespace) {
        int cp;
        if (StringUtil.isEmpty(token)) {
            return false;
        }
        if (!filterDigits && !filterWhitespace) {
            return true;
        }
        boolean isWhitespaceOnly = true;
        for (int i = 0; i < token.length(); i += Character.charCount(cp)) {
            cp = token.codePointAt(i);
            if (filterDigits && Character.isDigit(cp)) {
                return false;
            }
            if (!filterWhitespace || StringUtil.isWhiteSpace(cp)) continue;
            isWhitespaceOnly = false;
        }
        return !filterWhitespace || !isWhitespaceOnly;
    }

    protected abstract TokenStream getTokenStream(String var1, boolean var2, boolean var3) throws IOException;

    protected TokenStream getStandardTokenStream(String strOrig) throws IOException {
        StandardTokenizer tokenizer = new StandardTokenizer();
        tokenizer.setReader((Reader)new StringReader(strOrig));
        return tokenizer;
    }

    @Override
    public String[] getSupportedLanguages() {
        return this.getAnnotationLanguages();
    }

    private String[] getAnnotationLanguages() {
        Tokenizer ann = this.getClass().getAnnotation(Tokenizer.class);
        if (ann == null) {
            throw new RuntimeException(this.getClass().getName() + " must have a " + Tokenizer.class.getName() + " annotation available at runtime.");
        }
        String[] languages = ann.languages();
        if (languages.length == 0) {
            throw new RuntimeException(this.getClass().getName() + " must have a non-empty " + Tokenizer.class.getName() + " annotation available at runtime.");
        }
        return languages;
    }

    protected Language getEffectiveLanguage() {
        String[] languages = this.getAnnotationLanguages();
        if (languages.length == 1) {
            if (languages[0].equals("discoverAtRuntime")) {
                return this.getProjectLanguage();
            }
            return new Language(languages[0]);
        }
        return this.getProjectLanguage();
    }

    protected Language getProjectLanguage() {
        IProject proj = Core.getProject();
        if (proj == null) {
            throw new RuntimeException("This tokenizer's language can only be determined in the context of a project, but project is null.");
        }
        if (proj.getSourceTokenizer() == this) {
            return proj.getProjectProperties().getSourceLanguage();
        }
        if (proj.getTargetTokenizer() == this) {
            return proj.getProjectProperties().getTargetLanguage();
        }
        throw new RuntimeException("This tokenizer's language can only be determined in the context of a project, but is not assigned to current project.");
    }

    protected String test(String ... args) {
        StringBuilder sb = new StringBuilder();
        sb.append(this.getClass().getName()).append('\n');
        for (String input : args) {
            sb.append("Input:\n");
            sb.append(input).append("\n");
            sb.append("tokenizeVerbatim:\n");
            sb.append(this.printTest(this.tokenizeVerbatimToStrings(input), input));
            sb.append("tokenize:\n");
            sb.append(this.printTest(this.tokenizeToStrings(input, false, false, false, true), input));
            sb.append("tokenize (stemsAllowed):\n");
            sb.append(this.printTest(this.tokenizeToStrings(input, true, false, false, true), input));
            sb.append("tokenize (stemsAllowed stopWordsAllowed):\n");
            sb.append(this.printTest(this.tokenizeToStrings(input, true, true, false, true), input));
            sb.append("tokenize (stemsAllowed stopWordsAllowed filterDigits) (=tokenizeWords(MATCHING)):\n");
            sb.append(this.printTest(this.tokenizeToStrings(input, true, true, true, true), input));
            sb.append("tokenize (stemsAllowed filterDigits) (=tokenizeWords(GLOSSARY)):\n");
            sb.append(this.printTest(this.tokenizeToStrings(input, true, false, true, true), input));
            sb.append("tokenize (filterDigits) (=tokenizeWords(NONE)):\n");
            sb.append(this.printTest(this.tokenizeToStrings(input, false, false, true, true), input));
            sb.append("----------------------------------\n");
        }
        return sb.toString();
    }

    protected String printTest(String[] strings, String input) {
        StringBuilder sb = new StringBuilder();
        sb.append(StringUtils.join((Object[])strings, (String)", ")).append('\n');
        sb.append("Is verbatim: ").append(StringUtils.join((Object[])strings, (String)"").equals(input)).append('\n');
        return sb.toString();
    }
}

