小风儿 发表于 2019-1-31 06:59:54

用Spark写一个简单的wordcount词频统计程序

public class WordCountLocal {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("WordCountLocal").setMaster("local");

JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD words = sc.textFile("c:.//words.txt").flatMap(new FlatMapFunction() {
   @Override
   public Iterable call(String line) throws Exception {
    return Arrays.asList(line.split(" "));
   }
});
JavaPairRDD mapToPair = words.mapToPair(new PairFunction() {
   @Override
   public Tuple2 call(String word) throws Exception {   
    return new Tuple2(word,1);   
   }
});
JavaPairRDD result = mapToPair.reduceByKey(new Function2() {
   
   @Override
   public Integer call(Integer v1, Integer v2) throws Exception {   
    return v1 + v2;
   }
});

result.foreach(new VoidFunction() {
   
   @Override
   public void call(Tuple2 wordCount) throws Exception {
   
    System.out.println(wordCount._1 + " appear " + wordCount._2 + " times!");
   }
});

sc.close();
}

}
  




页: [1]
查看完整版本: 用Spark写一个简单的wordcount词频统计程序