Java开发的mapreduce如何在hadoop中运行

y23335793 · 发表于 2017-12-18 06:56:17

package com.ksy.hadoop;　　

　　

import java.io.IOException;　　

import java.util.StringTokenizer;　　

　　

import org.apache.hadoop.conf.Configuration;　　

import org.apache.hadoop.fs.Path;　　

import org.apache.hadoop.io.IntWritable;　　

import org.apache.hadoop.io.Text;　　

import org.apache.hadoop.mapreduce.Job;　　

import org.apache.hadoop.mapreduce.Mapper;　　

import org.apache.hadoop.mapreduce.Reducer;　　

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;　　

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;　　

import org.apache.hadoop.util.GenericOptionsParser;　　

　　

/**　　* 该例子为网上经典例子统计单词出现次数
　　*
*/
　　
public>
　　public static>　　

　　private final static IntWritable one = new IntWritable(1);
　　private Text word = new Text();
　　

　　/**
　　* key 偏移量包括了回车所占的字符数(Windows和Linux环境会不同)
　　* value 一行数据
　　* context存储新Map的对象
　　*/
　　public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
　　StringTokenizer itr = new StringTokenizer(value.toString());
　　while (itr.hasMoreTokens()) {
　　word.set(itr.nextToken());
　　context.write(word, one);
　　}
　　}
　　}
　　

　　public static>　　private IntWritable result = new IntWritable();
　　

　　/**
　　* key 为Map中的key，hadoop会把相同key的内容合并为一个list，该list就为values。
　　* context为存放结果的对象
　　*/
　　public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException,
　　InterruptedException {
　　int sum = 0;
　　for (IntWritable val : values) {
　　sum += val.get();
　　}
　　result.set(sum);
　　context.write(key, result);
　　}
　　}
　　

　　public static void main(String[] args) throws Exception {
　　Configuration conf = new Configuration();
　　String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
　　if (otherArgs.length != 2) {
　　System.err.println("Usage: wordcount <in> <out>");
　　System.exit(2);
　　}
　　Job job = new Job(conf, "word count");
　　job.setJarByClass(WordCount.class);
　　job.setMapperClass(TokenizerMapper.class);
　　job.setCombinerClass(IntSumReducer.class);
　　job.setReducerClass(IntSumReducer.class);
　　job.setOutputKeyClass(Text.class);
　　job.setOutputValueClass(IntWritable.class);
　　FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
　　FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
　　System.exit(job.waitForCompletion(true) ? 0 : 1);
　　}
　　
}

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] Java开发的mapreduce如何在hadoop中运行

浏览过的版块

扫码加入运维网微信交流群