tonwei139 发表于 2019-1-30 11:12:46

spark分组取 topN

  SPARK用scala实现分组取topN
  原文件:
  class1 33
  class2 56
  class1 87
  class2 77
  class1 76
  class2 88
  class1 95
  class1 74
  class2 85
  class2 67
  class2 77
  class1 99
  class1 59
  class2 60
  import org.apache.spark.SparkConf
  import org.apache.spark.SparkContext
  import org.apache.commons.collections.map.Flat3Map
  

  object GroupTopN {
  def main(args: Array): Unit = {
  val conf = new SparkConf()
  .setAppName("TopN")
  .setMaster("local")
  val sc = new SparkContext(conf)
  val lines = sc.textFile("C://Users//Administrator//Desktop//spark//groupTopN.txt", 1)
  val pairs = lines.map { line => line.split(" ")}
  val xx = pairs.map { pair => (pair(0),pair(1).toInt) }
  val sort = xx.groupByKey()
  val result = sort.map(s=>(s._1,s._2.toList.sorted(Ordering.Int).take(4)))
  result.foreach(r => println(r._1+""+r._2))
  }
  }
  




页: [1]
查看完整版本: spark分组取 topN