Hadoop 学习一

cixiren 发表于 2018-10-30 13:44:30

package cn.edu.ytu.botao.wordcount;　　

　　
import java.io.IOException;
　　
import java.util.StringTokenizer;
　　

　　
import org.apache.hadoop.io.IntWritable;
　　
import org.apache.hadoop.io.Text;
　　
import org.apache.hadoop.mapreduce.Mapper;
　　
import org.apache.hadoop.mapreduce.Reducer;
　　
/**
　　
* 单词计数
　　
* @author botao
　　
*
　　
*/
　　
public class WordCount {
　　/**
　　* map 过程
　　* 继承Mapper接口设置map的输入类型为
　　* 输出类型为
　　*/
　　public static class TokenizerMapper extends Mapper{
　　//one 表示单词出现一次
　　private final static IntWritable one=new IntWritable(1);
　　//word 用于存储切下的单词
　　private Text word=new Text();
　　/**
　　* 重写map()方法
　　*/
　　@Override
　　protected void map(Object key, Text value, Context context)
　　throws IOException, InterruptedException {
　　// TODO Auto-generated method stub
　　//super.map(key, value, context);
　　//对value(要计数的文件进行单词切割) 进行切分
　　StringTokenizer tokenizer=new StringTokenizer(value.toString());
　　//将切割后的单词取出输出
　　while (tokenizer.hasMoreTokens()) {
　　word.set(tokenizer.nextToken());
　　//map 输出格式
　　context.write(word, one);
　　}
　　}
　　/**
　　* reduce 过程
　　* 继承Reducer接口设置输入类型为(该输入类型正为 mapper 的输出类型)
　　* 输出类型为：
　　*/
　　public static class SumReducer extends Reducer{
　　//numResult 记录单词的频数
　　private IntWritable numResult=new IntWritable();
　　/**
　　* 重写reduce()方法
　　*/
　　@Override
　　protected void reduce(Text key, Iterable values,
　　Context context)
　　throws IOException, InterruptedException {
　　// TODO Auto-generated method stub
　　intsum=0;
　　//对获取的计算value的和
　　for (IntWritable val : values) {
　　sum+=val.get();
　　}
　　//将频数存放到numResult 中
　　numResult.set(sum);
　　//收集结果
　　context.write(key, numResult);
　　}
　　}
　　}
　　
}

页: [1]

运维网's Archiver

Hadoop 学习一