lbdbzj110 发表于 2019-1-31 06:37:36

Spark:一个高效的分布式计算系统

import spark.SparkContextimport SparkContext._object WordCount {  
def main(args: Array) {
  
    if (args.length ==0 ){
  
      println("usage is org.test.WordCount ")
  
    }
  
    println("the args: ")
  
    args.foreach(println)
  

  
    val hdfsPath = "hdfs://hadoop1:8020"
  

  
    // create the SparkContext, args(0)由yarn传入appMaster地址
  
    val sc = new SparkContext(args(0), "WrodCount",
  
    System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_TEST_JAR")))
  

  
    val textFile = sc.textFile(hdfsPath + args(1))
  

  
    val result = textFile.flatMap(line => line.split("\\s+"))
  
      .map(word => (word, 1)).reduceByKey(_ + _)
  

  
    result.saveAsTextFile(hdfsPath + args(2))
  
}}


页: [1]
查看完整版本: Spark:一个高效的分布式计算系统