/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer.collocations.llr;

import com.google.common.io.Closeables;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.lucene.IteratorTokenStream;
import org.apache.mahout.math.function.ObjectIntProcedure;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
import org.apache.mahout.vectorizer.collocations.llr.Gram;
import org.apache.mahout.vectorizer.collocations.llr.GramKey;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CollocMapper
extends Mapper<Text, StringTuple, GramKey, Gram> {
    private static final byte[] EMPTY = new byte[0];
    public static final String MAX_SHINGLE_SIZE = "maxShingleSize";
    private static final int DEFAULT_MAX_SHINGLE_SIZE = 2;
    private static final Logger log = LoggerFactory.getLogger(CollocMapper.class);
    private int maxShingleSize;
    private boolean emitUnigrams;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void map(Text key, StringTuple value, final Mapper.Context context) throws IOException, InterruptedException {
        ShingleFilter sf = new ShingleFilter((TokenStream)new IteratorTokenStream(value.getEntries().iterator()), this.maxShingleSize);
        sf.reset();
        try {
            int count = 0;
            OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>(value.getEntries().size() * (this.maxShingleSize - 1));
            OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());
            do {
                String term = sf.getAttribute(CharTermAttribute.class).toString();
                String type = sf.getAttribute(TypeAttribute.class).type();
                if ("shingle".equals(type)) {
                    ++count;
                    ngrams.adjustOrPutValue(term, 1, 1);
                    continue;
                }
                if (!this.emitUnigrams || term.isEmpty()) continue;
                unigrams.adjustOrPutValue(term, 1, 1);
            } while (sf.incrementToken());
            final GramKey gramKey = new GramKey();
            ngrams.forEachPair(new ObjectIntProcedure<String>(){

                @Override
                public boolean apply(String term, int frequency) {
                    int i = term.lastIndexOf(32);
                    if (i != -1) {
                        try {
                            Gram ngram = new Gram(term, frequency, Gram.Type.NGRAM);
                            Gram head = new Gram(term.substring(0, i), frequency, Gram.Type.HEAD);
                            Gram tail = new Gram(term.substring(i + 1), frequency, Gram.Type.TAIL);
                            gramKey.set(head, EMPTY);
                            context.write((Object)gramKey, (Object)head);
                            gramKey.set(head, ngram.getBytes());
                            context.write((Object)gramKey, (Object)ngram);
                            gramKey.set(tail, EMPTY);
                            context.write((Object)gramKey, (Object)tail);
                            gramKey.set(tail, ngram.getBytes());
                            context.write((Object)gramKey, (Object)ngram);
                        }
                        catch (IOException e) {
                            throw new IllegalStateException(e);
                        }
                        catch (InterruptedException e) {
                            throw new IllegalStateException(e);
                        }
                    }
                    return true;
                }
            });
            unigrams.forEachPair(new ObjectIntProcedure<String>(){

                @Override
                public boolean apply(String term, int frequency) {
                    try {
                        Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM);
                        gramKey.set(unigram, EMPTY);
                        context.write((Object)gramKey, (Object)unigram);
                    }
                    catch (IOException e) {
                        throw new IllegalStateException(e);
                    }
                    catch (InterruptedException e) {
                        throw new IllegalStateException(e);
                    }
                    return true;
                }
            });
            context.getCounter((Enum)Count.NGRAM_TOTAL).increment((long)count);
            sf.end();
        }
        finally {
            Closeables.close(sf, true);
        }
    }

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration conf = context.getConfiguration();
        this.maxShingleSize = conf.getInt(MAX_SHINGLE_SIZE, 2);
        this.emitUnigrams = conf.getBoolean("emit-unigrams", false);
        if (log.isInfoEnabled()) {
            log.info("Max Ngram size is {}", (Object)this.maxShingleSize);
            log.info("Emit Unitgrams is {}", (Object)this.emitUnigrams);
        }
    }

    public static enum Count {
        NGRAM_TOTAL;

    }
}

