/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.math.hadoop.similarity.cooccurrence;

import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.mapreduce.VectorSumCombiner;
import org.apache.mahout.common.mapreduce.VectorSumReducer;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.MutableElement;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.TopElementsQueue;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.Vectors;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasure;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;
import org.apache.mahout.math.map.OpenIntIntHashMap;

public class RowSimilarityJob
extends AbstractJob {
    public static final double NO_THRESHOLD = Double.MIN_VALUE;
    public static final long NO_FIXED_RANDOM_SEED = Long.MIN_VALUE;
    private static final String SIMILARITY_CLASSNAME = RowSimilarityJob.class + ".distributedSimilarityClassname";
    private static final String NUMBER_OF_COLUMNS = RowSimilarityJob.class + ".numberOfColumns";
    private static final String MAX_SIMILARITIES_PER_ROW = RowSimilarityJob.class + ".maxSimilaritiesPerRow";
    private static final String EXCLUDE_SELF_SIMILARITY = RowSimilarityJob.class + ".excludeSelfSimilarity";
    private static final String THRESHOLD = RowSimilarityJob.class + ".threshold";
    private static final String NORMS_PATH = RowSimilarityJob.class + ".normsPath";
    private static final String MAXVALUES_PATH = RowSimilarityJob.class + ".maxWeightsPath";
    private static final String NUM_NON_ZERO_ENTRIES_PATH = RowSimilarityJob.class + ".nonZeroEntriesPath";
    private static final int DEFAULT_MAX_SIMILARITIES_PER_ROW = 100;
    private static final String OBSERVATIONS_PER_COLUMN_PATH = RowSimilarityJob.class + ".observationsPerColumnPath";
    private static final String MAX_OBSERVATIONS_PER_ROW = RowSimilarityJob.class + ".maxObservationsPerRow";
    private static final String MAX_OBSERVATIONS_PER_COLUMN = RowSimilarityJob.class + ".maxObservationsPerColumn";
    private static final String RANDOM_SEED = RowSimilarityJob.class + ".randomSeed";
    private static final int DEFAULT_MAX_OBSERVATIONS_PER_ROW = 500;
    private static final int DEFAULT_MAX_OBSERVATIONS_PER_COLUMN = 500;
    private static final int NORM_VECTOR_MARKER = Integer.MIN_VALUE;
    private static final int MAXVALUE_VECTOR_MARKER = -2147483647;
    private static final int NUM_NON_ZERO_ENTRIES_VECTOR_MARKER = -2147483646;

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new RowSimilarityJob(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        boolean succeeded;
        String similarityClassname;
        this.addInputOption();
        this.addOutputOption();
        this.addOption("numberOfColumns", "r", "Number of columns in the input matrix", false);
        this.addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
        this.addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: 100)", String.valueOf(100));
        this.addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false));
        this.addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
        this.addOption("maxObservationsPerRow", null, "sample rows down to this number of entries", String.valueOf(500));
        this.addOption("maxObservationsPerColumn", null, "sample columns down to this number of entries", String.valueOf(500));
        this.addOption("randomSeed", null, "use this seed for sampling", false);
        this.addOption(DefaultOptionCreator.overwriteOption().create());
        Map<String, List<String>> parsedArgs = this.parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }
        int numberOfColumns = this.hasOption("numberOfColumns") ? Integer.parseInt(this.getOption("numberOfColumns")) : this.getDimensions(this.getInputPath());
        String similarityClassnameArg = this.getOption("similarityClassname");
        try {
            similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname();
        }
        catch (IllegalArgumentException iae) {
            similarityClassname = similarityClassnameArg;
        }
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), this.getTempPath());
            HadoopUtil.delete(this.getConf(), this.getOutputPath());
        }
        int maxSimilaritiesPerRow = Integer.parseInt(this.getOption("maxSimilaritiesPerRow"));
        boolean excludeSelfSimilarity = Boolean.parseBoolean(this.getOption("excludeSelfSimilarity"));
        double threshold = this.hasOption("threshold") ? Double.parseDouble(this.getOption("threshold")) : Double.MIN_VALUE;
        long randomSeed = this.hasOption("randomSeed") ? Long.parseLong(this.getOption("randomSeed")) : Long.MIN_VALUE;
        int maxObservationsPerRow = Integer.parseInt(this.getOption("maxObservationsPerRow"));
        int maxObservationsPerColumn = Integer.parseInt(this.getOption("maxObservationsPerColumn"));
        Path weightsPath = this.getTempPath("weights");
        Path normsPath = this.getTempPath("norms.bin");
        Path numNonZeroEntriesPath = this.getTempPath("numNonZeroEntries.bin");
        Path maxValuesPath = this.getTempPath("maxValues.bin");
        Path pairwiseSimilarityPath = this.getTempPath("pairwiseSimilarity");
        Path observationsPerColumnPath = this.getTempPath("observationsPerColumn.bin");
        AtomicInteger currentPhase = new AtomicInteger();
        Job countObservations = this.prepareJob(this.getInputPath(), this.getTempPath("notUsed"), CountObservationsMapper.class, NullWritable.class, VectorWritable.class, SumObservationsReducer.class, NullWritable.class, VectorWritable.class);
        countObservations.setCombinerClass(VectorSumCombiner.class);
        countObservations.getConfiguration().set(OBSERVATIONS_PER_COLUMN_PATH, observationsPerColumnPath.toString());
        countObservations.setNumReduceTasks(1);
        countObservations.waitForCompletion(true);
        if (RowSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job normsAndTranspose = this.prepareJob(this.getInputPath(), weightsPath, VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class);
            normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class);
            Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration();
            normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold));
            normsAndTransposeConf.set(NORMS_PATH, normsPath.toString());
            normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
            normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString());
            normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname);
            normsAndTransposeConf.set(OBSERVATIONS_PER_COLUMN_PATH, observationsPerColumnPath.toString());
            normsAndTransposeConf.set(MAX_OBSERVATIONS_PER_ROW, String.valueOf(maxObservationsPerRow));
            normsAndTransposeConf.set(MAX_OBSERVATIONS_PER_COLUMN, String.valueOf(maxObservationsPerColumn));
            normsAndTransposeConf.set(RANDOM_SEED, String.valueOf(randomSeed));
            succeeded = normsAndTranspose.waitForCompletion(true);
            if (!succeeded) {
                return -1;
            }
        }
        if (RowSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job pairwiseSimilarity = this.prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class, IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class, VectorWritable.class);
            pairwiseSimilarity.setCombinerClass(VectorSumReducer.class);
            Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
            pairwiseConf.set(THRESHOLD, String.valueOf(threshold));
            pairwiseConf.set(NORMS_PATH, normsPath.toString());
            pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
            pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString());
            pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname);
            pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns);
            pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity);
            succeeded = pairwiseSimilarity.waitForCompletion(true);
            if (!succeeded) {
                return -1;
            }
        }
        if (RowSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job asMatrix = this.prepareJob(pairwiseSimilarityPath, this.getOutputPath(), UnsymmetrifyMapper.class, IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class, IntWritable.class, VectorWritable.class);
            asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class);
            asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
            boolean succeeded2 = asMatrix.waitForCompletion(true);
            if (!succeeded2) {
                return -1;
            }
        }
        return 0;
    }

    public static class MergeToTopKSimilaritiesReducer
    extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private int maxSimilaritiesPerRow;

        protected void setup(Reducer.Context ctx) throws IOException, InterruptedException {
            this.maxSimilaritiesPerRow = ctx.getConfiguration().getInt(MAX_SIMILARITIES_PER_ROW, 0);
            Preconditions.checkArgument(this.maxSimilaritiesPerRow > 0, "Maximum number of similarities per row must be greater then 0!");
        }

        protected void reduce(IntWritable row, Iterable<VectorWritable> partials, Reducer.Context ctx) throws IOException, InterruptedException {
            Vector allSimilarities = Vectors.merge(partials);
            Vector topKSimilarities = Vectors.topKElements(this.maxSimilaritiesPerRow, allSimilarities);
            ctx.write((Object)row, (Object)new VectorWritable(topKSimilarities));
        }
    }

    public static class UnsymmetrifyMapper
    extends Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private int maxSimilaritiesPerRow;

        protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
            this.maxSimilaritiesPerRow = ctx.getConfiguration().getInt(MAX_SIMILARITIES_PER_ROW, 0);
            Preconditions.checkArgument(this.maxSimilaritiesPerRow > 0, "Maximum number of similarities per row must be greater then 0!");
        }

        protected void map(IntWritable row, VectorWritable similaritiesWritable, Mapper.Context ctx) throws IOException, InterruptedException {
            Vector similarities = similaritiesWritable.get();
            RandomAccessSparseVector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1);
            TopElementsQueue topKQueue = new TopElementsQueue(this.maxSimilaritiesPerRow);
            for (Vector.Element nonZeroElement : similarities.nonZeroes()) {
                MutableElement top = (MutableElement)topKQueue.top();
                double candidateValue = nonZeroElement.get();
                if (candidateValue > top.get()) {
                    top.setIndex(nonZeroElement.index());
                    top.set(candidateValue);
                    topKQueue.updateTop();
                }
                transposedPartial.setQuick(row.get(), candidateValue);
                ctx.write((Object)new IntWritable(nonZeroElement.index()), (Object)new VectorWritable(transposedPartial));
                transposedPartial.setQuick(row.get(), 0.0);
            }
            RandomAccessSparseVector topKSimilarities = new RandomAccessSparseVector(similarities.size(), this.maxSimilaritiesPerRow);
            for (MutableElement topKSimilarity : topKQueue.getTopElements()) {
                topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
            }
            ctx.write((Object)row, (Object)new VectorWritable(topKSimilarities));
        }
    }

    public static class SimilarityReducer
    extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private VectorSimilarityMeasure similarity;
        private int numberOfColumns;
        private boolean excludeSelfSimilarity;
        private Vector norms;
        private double treshold;

        protected void setup(Reducer.Context ctx) throws IOException, InterruptedException {
            this.similarity = ClassUtils.instantiateAs(ctx.getConfiguration().get(SIMILARITY_CLASSNAME), VectorSimilarityMeasure.class);
            this.numberOfColumns = ctx.getConfiguration().getInt(NUMBER_OF_COLUMNS, -1);
            Preconditions.checkArgument(this.numberOfColumns > 0, "Number of columns must be greater then 0! But numberOfColumns = " + this.numberOfColumns);
            this.excludeSelfSimilarity = ctx.getConfiguration().getBoolean(EXCLUDE_SELF_SIMILARITY, false);
            this.norms = Vectors.read(new Path(ctx.getConfiguration().get(NORMS_PATH)), ctx.getConfiguration());
            this.treshold = Double.parseDouble(ctx.getConfiguration().get(THRESHOLD));
        }

        protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Reducer.Context ctx) throws IOException, InterruptedException {
            Iterator<VectorWritable> partialDotsIterator = partialDots.iterator();
            Vector dots = partialDotsIterator.next().get();
            while (partialDotsIterator.hasNext()) {
                Vector toAdd = partialDotsIterator.next().get();
                for (Vector.Element nonZeroElement : toAdd.nonZeroes()) {
                    dots.setQuick(nonZeroElement.index(), dots.getQuick(nonZeroElement.index()) + nonZeroElement.get());
                }
            }
            Vector similarities = dots.like();
            double normA = this.norms.getQuick(row.get());
            for (Vector.Element b : dots.nonZeroes()) {
                double similarityValue = this.similarity.similarity(b.get(), normA, this.norms.getQuick(b.index()), this.numberOfColumns);
                if (!(similarityValue >= this.treshold)) continue;
                similarities.set(b.index(), similarityValue);
            }
            if (this.excludeSelfSimilarity) {
                similarities.setQuick(row.get(), 0.0);
            }
            ctx.write((Object)row, (Object)new VectorWritable(similarities));
        }
    }

    public static class CooccurrencesMapper
    extends Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private VectorSimilarityMeasure similarity;
        private OpenIntIntHashMap numNonZeroEntries;
        private Vector maxValues;
        private double threshold;
        private static final Comparator<Vector.Element> BY_INDEX = new Comparator<Vector.Element>(){

            @Override
            public int compare(Vector.Element one, Vector.Element two) {
                return Ints.compare(one.index(), two.index());
            }
        };

        protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
            this.similarity = ClassUtils.instantiateAs(ctx.getConfiguration().get(SIMILARITY_CLASSNAME), VectorSimilarityMeasure.class);
            this.numNonZeroEntries = Vectors.readAsIntMap(new Path(ctx.getConfiguration().get(NUM_NON_ZERO_ENTRIES_PATH)), ctx.getConfiguration());
            this.maxValues = Vectors.read(new Path(ctx.getConfiguration().get(MAXVALUES_PATH)), ctx.getConfiguration());
            this.threshold = Double.parseDouble(ctx.getConfiguration().get(THRESHOLD));
        }

        private boolean consider(Vector.Element occurrenceA, Vector.Element occurrenceB) {
            int numNonZeroEntriesA = this.numNonZeroEntries.get(occurrenceA.index());
            int numNonZeroEntriesB = this.numNonZeroEntries.get(occurrenceB.index());
            double maxValueA = this.maxValues.get(occurrenceA.index());
            double maxValueB = this.maxValues.get(occurrenceB.index());
            return this.similarity.consider(numNonZeroEntriesA, numNonZeroEntriesB, maxValueA, maxValueB, this.threshold);
        }

        protected void map(IntWritable column, VectorWritable occurrenceVector, Mapper.Context ctx) throws IOException, InterruptedException {
            Vector.Element[] occurrences = Vectors.toArray(occurrenceVector);
            Arrays.sort(occurrences, BY_INDEX);
            int cooccurrences = 0;
            int prunedCooccurrences = 0;
            for (int n = 0; n < occurrences.length; ++n) {
                Vector.Element occurrenceA = occurrences[n];
                RandomAccessSparseVector dots = new RandomAccessSparseVector(Integer.MAX_VALUE);
                for (int m = n; m < occurrences.length; ++m) {
                    Vector.Element occurrenceB = occurrences[m];
                    if (this.threshold == Double.MIN_VALUE || this.consider(occurrenceA, occurrenceB)) {
                        dots.setQuick(occurrenceB.index(), this.similarity.aggregate(occurrenceA.get(), occurrenceB.get()));
                        ++cooccurrences;
                        continue;
                    }
                    ++prunedCooccurrences;
                }
                ctx.write((Object)new IntWritable(occurrenceA.index()), (Object)new VectorWritable(dots));
            }
            ctx.getCounter((Enum)Counters.COOCCURRENCES).increment((long)cooccurrences);
            ctx.getCounter((Enum)Counters.PRUNED_COOCCURRENCES).increment((long)prunedCooccurrences);
        }
    }

    public static class MergeVectorsReducer
    extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private Path normsPath;
        private Path numNonZeroEntriesPath;
        private Path maxValuesPath;

        protected void setup(Reducer.Context ctx) throws IOException, InterruptedException {
            this.normsPath = new Path(ctx.getConfiguration().get(NORMS_PATH));
            this.numNonZeroEntriesPath = new Path(ctx.getConfiguration().get(NUM_NON_ZERO_ENTRIES_PATH));
            this.maxValuesPath = new Path(ctx.getConfiguration().get(MAXVALUES_PATH));
        }

        protected void reduce(IntWritable row, Iterable<VectorWritable> partialVectors, Reducer.Context ctx) throws IOException, InterruptedException {
            Vector partialVector = Vectors.merge(partialVectors);
            if (row.get() == Integer.MIN_VALUE) {
                Vectors.write(partialVector, this.normsPath, ctx.getConfiguration());
            } else if (row.get() == -2147483647) {
                Vectors.write(partialVector, this.maxValuesPath, ctx.getConfiguration());
            } else if (row.get() == -2147483646) {
                Vectors.write(partialVector, this.numNonZeroEntriesPath, ctx.getConfiguration(), true);
            } else {
                ctx.write((Object)row, (Object)new VectorWritable(partialVector));
            }
        }
    }

    private static class MergeVectorsCombiner
    extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private MergeVectorsCombiner() {
        }

        protected void reduce(IntWritable row, Iterable<VectorWritable> partialVectors, Reducer.Context ctx) throws IOException, InterruptedException {
            ctx.write((Object)row, (Object)new VectorWritable(Vectors.merge(partialVectors)));
        }
    }

    public static class VectorNormMapper
    extends Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> {
        private VectorSimilarityMeasure similarity;
        private Vector norms;
        private Vector nonZeroEntries;
        private Vector maxValues;
        private double threshold;
        private OpenIntIntHashMap observationsPerColumn;
        private int maxObservationsPerRow;
        private int maxObservationsPerColumn;
        private Random random;

        protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
            Configuration conf = ctx.getConfiguration();
            this.similarity = ClassUtils.instantiateAs(conf.get(SIMILARITY_CLASSNAME), VectorSimilarityMeasure.class);
            this.norms = new RandomAccessSparseVector(Integer.MAX_VALUE);
            this.nonZeroEntries = new RandomAccessSparseVector(Integer.MAX_VALUE);
            this.maxValues = new RandomAccessSparseVector(Integer.MAX_VALUE);
            this.threshold = Double.parseDouble(conf.get(THRESHOLD));
            this.observationsPerColumn = Vectors.readAsIntMap(new Path(conf.get(OBSERVATIONS_PER_COLUMN_PATH)), conf);
            this.maxObservationsPerRow = conf.getInt(MAX_OBSERVATIONS_PER_ROW, 500);
            this.maxObservationsPerColumn = conf.getInt(MAX_OBSERVATIONS_PER_COLUMN, 500);
            long seed = Long.parseLong(conf.get(RANDOM_SEED));
            this.random = seed == Long.MIN_VALUE ? RandomUtils.getRandom() : RandomUtils.getRandom(seed);
        }

        private Vector sampleDown(Vector rowVector, Mapper.Context ctx) {
            int observationsPerRow = rowVector.getNumNondefaultElements();
            double rowSampleRate = (double)Math.min(this.maxObservationsPerRow, observationsPerRow) / (double)observationsPerRow;
            Vector downsampledRow = rowVector.like();
            long usedObservations = 0L;
            long neglectedObservations = 0L;
            for (Vector.Element elem : rowVector.nonZeroes()) {
                int columnCount = this.observationsPerColumn.get(elem.index());
                double columnSampleRate = (double)Math.min(this.maxObservationsPerColumn, columnCount) / (double)columnCount;
                if (this.random.nextDouble() <= Math.min(rowSampleRate, columnSampleRate)) {
                    downsampledRow.setQuick(elem.index(), elem.get());
                    ++usedObservations;
                    continue;
                }
                ++neglectedObservations;
            }
            ctx.getCounter((Enum)Counters.USED_OBSERVATIONS).increment(usedObservations);
            ctx.getCounter((Enum)Counters.NEGLECTED_OBSERVATIONS).increment(neglectedObservations);
            return downsampledRow;
        }

        protected void map(IntWritable row, VectorWritable vectorWritable, Mapper.Context ctx) throws IOException, InterruptedException {
            Vector sampledRowVector = this.sampleDown(vectorWritable.get(), ctx);
            Vector rowVector = this.similarity.normalize(sampledRowVector);
            int numNonZeroEntries = 0;
            double maxValue = Double.MIN_VALUE;
            for (Vector.Element element : rowVector.nonZeroes()) {
                RandomAccessSparseVector partialColumnVector = new RandomAccessSparseVector(Integer.MAX_VALUE);
                partialColumnVector.setQuick(row.get(), element.get());
                ctx.write((Object)new IntWritable(element.index()), (Object)new VectorWritable(partialColumnVector));
                ++numNonZeroEntries;
                if (!(maxValue < element.get())) continue;
                maxValue = element.get();
            }
            if (this.threshold != Double.MIN_VALUE) {
                this.nonZeroEntries.setQuick(row.get(), numNonZeroEntries);
                this.maxValues.setQuick(row.get(), maxValue);
            }
            this.norms.setQuick(row.get(), this.similarity.norm(rowVector));
            ctx.getCounter((Enum)Counters.ROWS).increment(1L);
        }

        protected void cleanup(Mapper.Context ctx) throws IOException, InterruptedException {
            ctx.write((Object)new IntWritable(Integer.MIN_VALUE), (Object)new VectorWritable(this.norms));
            ctx.write((Object)new IntWritable(-2147483646), (Object)new VectorWritable(this.nonZeroEntries));
            ctx.write((Object)new IntWritable(-2147483647), (Object)new VectorWritable(this.maxValues));
        }
    }

    public static class SumObservationsReducer
    extends Reducer<NullWritable, VectorWritable, NullWritable, VectorWritable> {
        protected void reduce(NullWritable nullWritable, Iterable<VectorWritable> partialVectors, Reducer.Context ctx) throws IOException, InterruptedException {
            Vector counts = Vectors.sum(partialVectors.iterator());
            Vectors.write(counts, new Path(ctx.getConfiguration().get(OBSERVATIONS_PER_COLUMN_PATH)), ctx.getConfiguration());
        }
    }

    public static class CountObservationsMapper
    extends Mapper<IntWritable, VectorWritable, NullWritable, VectorWritable> {
        private Vector columnCounts = new RandomAccessSparseVector(Integer.MAX_VALUE);

        protected void map(IntWritable rowIndex, VectorWritable rowVectorWritable, Mapper.Context ctx) throws IOException, InterruptedException {
            Vector row = rowVectorWritable.get();
            for (Vector.Element elem : row.nonZeroes()) {
                this.columnCounts.setQuick(elem.index(), this.columnCounts.getQuick(elem.index()) + 1.0);
            }
        }

        protected void cleanup(Mapper.Context ctx) throws IOException, InterruptedException {
            ctx.write((Object)NullWritable.get(), (Object)new VectorWritable(this.columnCounts));
        }
    }

    static enum Counters {
        ROWS,
        USED_OBSERVATIONS,
        NEGLECTED_OBSERVATIONS,
        COOCCURRENCES,
        PRUNED_COOCCURRENCES;

    }
}

