/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.canopy;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.canopy.CanopyClusterer;
import org.apache.mahout.clustering.canopy.CanopyMapper;
import org.apache.mahout.clustering.canopy.CanopyReducer;
import org.apache.mahout.clustering.classify.ClusterClassificationDriver;
import org.apache.mahout.clustering.classify.ClusterClassifier;
import org.apache.mahout.clustering.iterator.CanopyClusteringPolicy;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CanopyDriver
extends AbstractJob {
    public static final String DEFAULT_CLUSTERED_POINTS_DIRECTORY = "clusteredPoints";
    private static final Logger log = LoggerFactory.getLogger(CanopyDriver.class);

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new CanopyDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption(DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption(DefaultOptionCreator.t1Option().create());
        this.addOption(DefaultOptionCreator.t2Option().create());
        this.addOption(DefaultOptionCreator.t3Option().create());
        this.addOption(DefaultOptionCreator.t4Option().create());
        this.addOption(DefaultOptionCreator.clusterFilterOption().create());
        this.addOption(DefaultOptionCreator.overwriteOption().create());
        this.addOption(DefaultOptionCreator.clusteringOption().create());
        this.addOption(DefaultOptionCreator.methodOption().create());
        this.addOption(DefaultOptionCreator.outlierThresholdOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        Configuration conf = this.getConf();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(conf, output);
        }
        String measureClass = this.getOption("distanceMeasure");
        double t1 = Double.parseDouble(this.getOption("t1"));
        double t2 = Double.parseDouble(this.getOption("t2"));
        double t3 = t1;
        if (this.hasOption("t3")) {
            t3 = Double.parseDouble(this.getOption("t3"));
        }
        double t4 = t2;
        if (this.hasOption("t4")) {
            t4 = Double.parseDouble(this.getOption("t4"));
        }
        int clusterFilter = 0;
        if (this.hasOption("clusterFilter")) {
            clusterFilter = Integer.parseInt(this.getOption("clusterFilter"));
        }
        boolean runClustering = this.hasOption("clustering");
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
        double clusterClassificationThreshold = 0.0;
        if (this.hasOption("outlierThreshold")) {
            clusterClassificationThreshold = Double.parseDouble(this.getOption("outlierThreshold"));
        }
        CanopyDriver.run(conf, input, output, measure, t1, t2, t3, t4, clusterFilter, runClustering, clusterClassificationThreshold, runSequential);
        return 0;
    }

    public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double t3, double t4, int clusterFilter, boolean runClustering, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        Path clustersOut = CanopyDriver.buildClusters(conf, input, output, measure, t1, t2, t3, t4, clusterFilter, runSequential);
        if (runClustering) {
            CanopyDriver.clusterData(conf, input, clustersOut, output, clusterClassificationThreshold, runSequential);
        }
    }

    public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, boolean runClustering, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        CanopyDriver.run(conf, input, output, measure, t1, t2, t1, t2, 0, runClustering, clusterClassificationThreshold, runSequential);
    }

    public static void run(Path input, Path output, DistanceMeasure measure, double t1, double t2, boolean runClustering, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        CanopyDriver.run(new Configuration(), input, output, measure, t1, t2, runClustering, clusterClassificationThreshold, runSequential);
    }

    public static Path buildClusters(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, int clusterFilter, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        return CanopyDriver.buildClusters(conf, input, output, measure, t1, t2, t1, t2, clusterFilter, runSequential);
    }

    public static Path buildClusters(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double t3, double t4, int clusterFilter, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        log.info("Build Clusters Input: {} Out: {} Measure: {} t1: {} t2: {}", input, output, measure, t1, t2);
        if (runSequential) {
            return CanopyDriver.buildClustersSeq(input, output, measure, t1, t2, clusterFilter);
        }
        return CanopyDriver.buildClustersMR(conf, input, output, measure, t1, t2, t3, t4, clusterFilter);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static Path buildClustersSeq(Path input, Path output, DistanceMeasure measure, double t1, double t2, int clusterFilter) throws IOException {
        CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
        ArrayList<Canopy> canopies = Lists.newArrayList();
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        for (VectorWritable vw : new SequenceFileDirValueIterable(input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            clusterer.addPointToCanopies(vw.get(), canopies);
        }
        Path canopyOutputDir = new Path(output, "clusters-0-final");
        Path path = new Path(canopyOutputDir, "part-r-00000");
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, ClusterWritable.class);
        try {
            ClusterWritable clusterWritable = new ClusterWritable();
            for (Canopy canopy : canopies) {
                canopy.computeParameters();
                if (log.isDebugEnabled()) {
                    log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}", canopy.getIdentifier(), AbstractCluster.formatVector(canopy.getCenter(), null), canopy.getNumObservations(), AbstractCluster.formatVector(canopy.getRadius(), null));
                }
                if (canopy.getNumObservations() <= (long)clusterFilter) continue;
                clusterWritable.setValue(canopy);
                writer.append((Writable)new Text(canopy.getIdentifier()), (Writable)clusterWritable);
            }
        }
        finally {
            Closeables.close((Closeable)writer, false);
        }
        return canopyOutputDir;
    }

    private static Path buildClustersMR(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double t3, double t4, int clusterFilter) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.canopy.measure", measure.getClass().getName());
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(t1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(t2));
        conf.set("org.apache.mahout.clustering.canopy.t3", String.valueOf(t3));
        conf.set("org.apache.mahout.clustering.canopy.t4", String.valueOf(t4));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", String.valueOf(clusterFilter));
        Job job = new Job(conf, "Canopy Driver running buildClusters over input: " + input);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(CanopyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(VectorWritable.class);
        job.setReducerClass(CanopyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(ClusterWritable.class);
        job.setNumReduceTasks(1);
        job.setJarByClass(CanopyDriver.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        Path canopyOutputDir = new Path(output, "clusters-0-final");
        FileOutputFormat.setOutputPath((Job)job, (Path)canopyOutputDir);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Canopy Job failed processing " + input);
        }
        return canopyOutputDir;
    }

    private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
        ClusterClassificationDriver.run(conf, points, output, new Path(output, DEFAULT_CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential);
    }
}

