简单的java Hadoop MapReduce程序(计算平均成绩)从打包到提交及运行

angela · 发表于 2017-12-17 07:08:43

import java.io.IOException;　　import java.util.Iterator;
　　import java.util.StringTokenizer;
　　import org.apache.hadoop.conf.Configuration;
　　import org.apache.hadoop.fs.Path;
　　import org.apache.hadoop.io.IntWritable;
　　import org.apache.hadoop.io.LongWritable;
　　import org.apache.hadoop.io.Text;
　　import org.apache.hadoop.mapreduce.Job;
　　import org.apache.hadoop.mapreduce.Mapper;
　　import org.apache.hadoop.mapreduce.Reducer;
　　import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
　　import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
　　import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
　　import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
　　import org.apache.hadoop.util.GenericOptionsParser;

　　public>
　　public static>　　Mapper<LongWritable, Text, Text, IntWritable> {
　　// 实现map函数
　　public void map(LongWritable key, Text value, Context context)
　　throws IOException, InterruptedException {
　　// 将输入的纯文本文件的数据转化成String
　　String line = value.toString();
　　// 将输入的数据首先按行进行分割
　　StringTokenizer tokenizerArticle = new StringTokenizer(line, "\n");
　　// 分别对每一行进行处理
　　while (tokenizerArticle.hasMoreElements()) {
　　// 每行按空格划分
　　StringTokenizer tokenizerLine = new StringTokenizer(tokenizerArticle.nextToken());
　　String strName = tokenizerLine.nextToken();// 学生姓名部分
　　String strScore = tokenizerLine.nextToken();// 成绩部分
　　Text name = new Text(strName);
　　int scoreInt = Integer.parseInt(strScore);
　　// 输出姓名和成绩
　　context.write(name, new IntWritable(scoreInt));
　　}
　　}
　　}

　　public static>　　Reducer<Text, IntWritable, Text, IntWritable> {
　　// 实现reduce函数
　　public void reduce(Text key, Iterable<IntWritable> values,
　　Context context) throws IOException, InterruptedException {
　　int sum = 0;
　　int count = 0;
　　Iterator<IntWritable> iterator = values.iterator();
　　while (iterator.hasNext()) {
　　sum += iterator.next().get();// 计算总分
　　count++;// 统计总的科目数
　　}
　　int average = (int) sum / count;// 计算平均成绩
　　context.write(key, new IntWritable(average));
　　}
　　}
　　public static void main(String[] args) throws Exception {
　　Configuration conf = new Configuration();
　　// "localhost:9000" 需要根据实际情况设置一下
　　conf.set("mapred.job.tracker", "localhost:9000");
　　// 一个hdfs文件系统中的输入目录及输出目录
　　String[] ioArgs = new String[] { "input/score", "output" };
　　String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
　　if (otherArgs.length != 2) {
　　System.err.println("Usage: Score Average <in> <out>");
　　System.exit(2);
　　}
　　Job job = new Job(conf, "Score Average");
　　job.setJarByClass(Score.class);
　　// 设置Map、Combine和Reduce处理类
　　job.setMapperClass(Map.class);
　　job.setCombinerClass(Reduce.class);
　　job.setReducerClass(Reduce.class);
　　// 设置输出类型
　　job.setOutputKeyClass(Text.class);
　　job.setOutputValueClass(IntWritable.class);
　　// 将输入的数据集分割成小数据块splites，提供一个RecordReder的实现
　　job.setInputFormatClass(TextInputFormat.class);
　　// 提供一个RecordWriter的实现，负责数据输出
　　job.setOutputFormatClass(TextOutputFormat.class);
　　// 设置输入和输出目录
　　FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
　　FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
　　System.exit(job.waitForCompletion(true) ? 0 : 1);
　　}
　　}

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] 简单的java Hadoop MapReduce程序(计算平均成绩)从打包到提交及运行

浏览过的版块

扫码加入运维网微信交流群