KM algorithm Hadoop

The current program only run one iteration of the KMeans algorithm. Please revise it (in the main function) to implement iterative processing, paste your code here, and briefly describe how it works. Were you able to successfully compile and run your program (yes/no)?

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class KMeans {

 public static class KMMapper
      extends Mapper<Object, Text, IntWritable, Text>{

   private double [][] _centroids;
   private IntWritable cid = new IntWritable();

   public void setup(Mapper.Context context){
     Configuration conf = context.getConfiguration();
     String filename = conf.get(“Centroids-file”);
     _centroids = loadCentroids(filename, conf);
   }

   public void map(Object key, Text value, Context context
                   ) throws IOException, InterruptedException {
     double [] vec = parseVector(value.toString());
     cid.set(closest(vec));
     context.write(cid, value);
   }

   private int closest(double [] v){
     double mindist = dist(v, _centroids[0]);
     int label =0;
     for (int i=1; i<_centroids.length; i++){
       double t = dist(v, _centroids[i]);
       if (mindist>t){
         mindist = t;
         label = i;
       }
     }
     return label;
   }

 }

 public static class KMReducer
      extends Reducer<IntWritable, Text, IntWritable, Text> {
   // write output: cid \t centroid_vector
   private Text result = new Text();

   public void reduce(IntWritable key, Iterable<Text> vectors,
                      Context context
                      ) throws IOException, InterruptedException {
     double [] sum = null;
     int n=0;
     for (Text vec : vectors) {
       double [] v = parseVector(vec.toString());
       if (sum == null) sum = v;
       else
         for (int i = 0; i < v.length; i++)
           sum[i] += v[i];
       n ++;
     }
     String out = Double.toString(sum[0]/n);
     for (int i = 1; i < sum.length; i ++ ){
       out +=  “,” + Double.toString(sum[i]/n); // csv output
     }
     result.set(out);
     context.write(key, result);
   }
 }

 // compute square Euclidean distance between two vectors v1 and v2
 public static double dist(double [] v1, double [] v2){
   double sum=0;
   for (int i=0; i< v1.length; i++){
     double d = v1[i]-v2[i];
     sum += d*d;
   }
   return Math.sqrt(sum);
 }

 // check convergence condition
 // max{dist(c1[i], c2[i]), i=1..numClusters < threshold
 private boolean converge(double [][] c1, double [][] c2, double threshold){
   // c1 and c2 are two sets of centroids
   double maxv = 0;
   for (int i=0; i< c1.length; i++){
       double d= dist(c1[i], c2[i]);
       if (maxv<d)
           maxv = d;
   }

   if (maxv <threshold)
     return true;
   else
     return false;
   
 }

 public static double [][] loadCentroids(String filename, Configuration conf){

   double [][] centroids=null;
   Path p = new Path(filename);  // Path is used for opening the file.
   try{
     FileSystem fs = FileSystem.get(conf);//determines local or HDFS
     FSDataInputStream file = fs.open(p);
     byte[] bs = new byte[file.available()];
     file.read(bs);
     file.close();
     String [] lines = (new String(bs)).split(“\n”); //lines are separated by \n
     for (String line:lines)
       System.out.println(line);
     centroids = new double[lines.length][];
     for (int i = 0; i < lines.length; i++){
       // cid \t centroid
       String [] parts = lines[i].split(“\t”);
       int cid = Integer.parseInt(parts[0]);
       centroids[cid] = parseVector(parts[1]);
     }
   }catch(Exception e){
       //log.error(e);
       System.out.println(e);
   }
   return centroids;
 }  

 public static double [] parseVector(String s){
   String [] itr = s.split(“,”); // comma separated
   double [] v = new double[itr.length];
   for (int i = 0; i < itr.length; i++)
     v[i] = Double.parseDouble(itr[i]);
   
   return v;
 }
 
 public static void main(String[] args) throws Exception {
   
   // usage: hadoop jar km.jar hdfs://localhost:9000/user/your_home_directory/centroids data.hdfs output
   Configuration conf = new Configuration();
   conf.set(“Centroids-file”, args[0]);
   System.out.println(conf.get(“Centroids-file”));
   
   Job job = Job.getInstance(conf, “KMeans”);
   job.setJarByClass(KMeans.class);
   job.setMapperClass(KMMapper.class);
   //job.setCombinerClass(KMCombiner.class);
   job.setReducerClass(KMReducer.class);
   job.setOutputKeyClass(IntWritable.class);
   job.setOutputValueClass(Text.class);
   FileInputFormat.addInputPath(job, new Path(args[1]));
   FileOutputFormat.setOutputPath(job, new Path(args[2]));    
   System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
}

Place your order
(550 words)

Approximate price: $22

Calculate the price of your order

550 words
We'll send you the first draft for approval by September 11, 2018 at 10:52 AM
Total price:
$26
The price is based on these factors:
Academic level
Number of pages
Urgency
Basic features
  • Free title page and bibliography
  • Unlimited revisions
  • Plagiarism-free guarantee
  • Money-back guarantee
  • 24/7 support
On-demand options
  • Writer’s samples
  • Part-by-part delivery
  • Overnight delivery
  • Copies of used sources
  • Expert Proofreading
Paper format
  • 275 words per page
  • 12 pt Arial/Times New Roman
  • Double line spacing
  • Any citation style (APA, MLA, Chicago/Turabian, Harvard)

Our Guarantees

Money-back Guarantee

You have to be 100% sure of the quality of your product to give a money-back guarantee. This describes us perfectly. Make sure that this guarantee is totally transparent.

Read more

Zero-plagiarism Guarantee

Each paper is composed from scratch, according to your instructions. It is then checked by our plagiarism-detection software. There is no gap where plagiarism could squeeze in.

Read more

Free-revision Policy

Thanks to our free revisions, there is no way for you to be unsatisfied. We will work on your paper until you are completely happy with the result.

Read more

Privacy Policy

Your email is safe, as we store it according to international data protection rules. Your bank details are secure, as we use only reliable payment systems.

Read more

Fair-cooperation Guarantee

By sending us your money, you buy the service we provide. Check out our terms and conditions if you prefer business talks to be laid out in official language.

Read more