第六篇：Eclipse上运行第一个Hadoop实例

659441806 · 发表于 2017-12-18 13:22:22

1 /**　　
  2  *  Licensed under the Apache License, Version 2.0 (the "License");
　　
  3  *  you may not use this file except in compliance with the License.
　　
  4  *  You may obtain a copy of the License at
　　
  5  *
　　
  6  *    http://www.apache.org/licenses/LICENSE-2.0
　　
  7  *
　　
  8  *  Unless required by applicable law or agreed to in writing, software
　　
  9  *  distributed under the License is distributed on an "AS IS" BASIS,
　　
10  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
　　
11  *  See the License for the specific language governing permissions and
　　
12  *  limitations under the License.
　　
13  */
　　
14
　　
15
　　
16 package org.apache.hadoop.examples;
　　
17
　　
18 import java.io.IOException;
　　
19 import java.util.StringTokenizer;
　　
20
　　
21 //导入各种Hadoop包
　　
22 import org.apache.hadoop.conf.Configuration;
　　
23 import org.apache.hadoop.fs.Path;
　　
24 import org.apache.hadoop.io.IntWritable;
　　
25 import org.apache.hadoop.io.Text;
　　
26 import org.apache.hadoop.mapreduce.Job;
　　
27 import org.apache.hadoop.mapreduce.Mapper;
　　
28 import org.apache.hadoop.mapreduce.Reducer;
　　
29 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
　　
30 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
　　
31 import org.apache.hadoop.util.GenericOptionsParser;
　　
32
　　
33 // 主类

　　
34 public>　　
35
　　
36    // Mapper类

　　
37    public static>　　
38
　　
39       // new一个值为1的整数对象
　　
40       private final static IntWritable one = new IntWritable(1);
　　
41       // new一个空的Text对象
　　
42       private Text word = new Text();
　　
43
　　
44       // 实现map函数
　　
45       public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
　　
46
　　
47          // 创建value的字符串迭代器
　　
48          StringTokenizer itr = new StringTokenizer(value.toString());
　　
49
　　
50          // 对数据进行再次分割并输出map结果。初始格式为<字节偏移量，单词> 目标格式为<单词，频率>
　　
51          while (itr.hasMoreTokens()) {
　　
52                   word.set(itr.nextToken());
　　
53                   context.write(word, one);
　　
54          }
　　
55       }
　　
56    }
　　
57
　　
58    // Reducer类

　　
59    public static>　　
60
　　
61       // new一个值为空的整数对象
　　
62       private IntWritable result = new IntWritable();
　　
63
　　
64       // 实现reduce函数
　　
65       public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
　　
66
　　
67          int sum = 0;
　　
68          for (IntWritable val : values) {
　　
69                sum += val.get();
　　
70          }
　　
71
　　
72          // 得到本次计算的单词的频数
　　
73          result.set(sum);
　　
74
　　
75          // 输出reduce结果
　　
76          context.write(key, result);
　　
77       }
　　
78    }
　　
79
　　
80    // 主函数
　　
81    public static void main(String[] args) throws Exception {
　　
82
　　
83       // 获取配置参数
　　
84       Configuration conf = new Configuration();
　　
85       String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
　　
86
　　
87       // 检查命令语法
　　
88       if (otherArgs.length != 2) {
　　
89                System.err.println("Usage: wordcount <in> <out>");
　　
90                System.exit(2);
　　
91       }
　　
92
　　
93       // 定义作业对象
　　
94       Job job = new Job(conf, "word count");
　　
95       // 注册分布式类
　　
96       job.setJarByClass(WordCount.class);
　　
97       // 注册Mapper类
　　
98       job.setMapperClass(TokenizerMapper.class);
　　
99       // 注册合并类
　　
100       job.setCombinerClass(IntSumReducer.class);
　　
101       // 注册Reducer类
　　
102       job.setReducerClass(IntSumReducer.class);
　　
103       // 注册输出格式类
　　
104       job.setOutputKeyClass(Text.class);
　　
105       job.setOutputValueClass(IntWritable.class);
　　
106       // 设置输入输出路径
　　
107       FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
　　
108       FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
　　
109
　　
110       // 运行程序
　　
111       System.exit(job.waitForCompletion(true) ? 0 : 1);
　　
112    }
　　
113 }

账号		自动登录	找回密码
密码			立即注册

Centos6.5×64安装配置openmeetings3.0.3详

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

[经验分享] 第六篇：Eclipse上运行第一个Hadoop实例

浏览过的版块

扫码加入运维网微信交流群