hadoop的wordcount入门例子

她人 · 发表于 2014-3-11 10:15:18

package com.asp;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {

static class TokenizerMapper extends
         Mapper<Object, Text, Text, IntWritable> {

      private final static IntWritable one = new IntWritable(1);
      private Text word = new Text();

      public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
         StringTokenizer itr = new StringTokenizer(value.toString());
         while (itr.hasMoreTokens()) {
            word.set(itr.nextToken());
            context.write(word, one);
         }
      }
}

static class IntSumReducer extends
         Reducer<Text, IntWritable, Text, IntWritable> {
      private IntWritable result = new IntWritable();

      public void reduce(Text key, Iterable<IntWritable> values,
            Context context) throws IOException, InterruptedException {
         int sum = 0;
         for (IntWritable val : values) {
            sum += val.get();
         }
         result.set(sum);
         context.write(key, result);
      }
}
public static void main(String[] args) throws Exception {
      Configuration conf = new Configuration();
      String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
      if (otherArgs.length != 2) {
         System.err.println("Usage: simple kpi1 <in> <out>");
         System.exit(2);
      }
      Job job = new Job(conf, "word count");
      job.setJarByClass(WordCount.class);// 主类
      job.setMapperClass(TokenizerMapper.class);// mapper
      job.setReducerClass(IntSumReducer.class);// reducer
      job.setOutputKeyClass(Text.class);// 设置作业输出数据的关键类
      job.setOutputValueClass(IntWritable.class);// 设置作业输出值类
      FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
      FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
      System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
打成jar  执行 hadoop jar hbase20140310.jar  com.asp.WorldCount  /test/1.txt /test/output

账号		自动登录	找回密码
密码			立即注册

zabbix3.4中文手册，官网完整COPY（2019042

最新rhel8官方手册三本PDF

winhex数据恢复教程（非常巨大，内容丰富）

KMSpico10.2.0 免费激活Win10/Office2016（

zabbix3.4.1安装部署+微信推送信息+大屏显

VMware vcenter+vSphere 6.5 U2共享

CentOS6.5下redis-3.2.6的安装与配置

[经验分享] hadoop的wordcount入门例子

扫码加入运维网微信交流群