[hadoop]Windows下eclipse导入hadoop源码，编译WordCount，打包jar

lgyx · 发表于 2015-7-12 07:03:10

　　hadoop版本为hadoop1.2.1
　　eclipse版本为eclipse-standard-kepler-SR2-win32-x86_64
　　WordCount.java为hadoop-1.2.1\src\examples\org\apache\hadoop\examples\WordCount.java

1 /**
2  *  Licensed under the Apache License, Version 2.0 (the "License");
3  *  you may not use this file except in compliance with the License.
4  *  You may obtain a copy of the License at
5  *
6  *    http://www.apache.org/licenses/LICENSE-2.0
7  *
8  *  Unless required by applicable law or agreed to in writing, software
9  *  distributed under the License is distributed on an "AS IS" BASIS,
10  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  *  See the License for the specific language governing permissions and
12  *  limitations under the License.
13  */
14
15
16 package org.apache.hadoop.examples;
17
18 import java.io.IOException;
19 import java.util.StringTokenizer;
20
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.fs.Path;
23 import org.apache.hadoop.io.IntWritable;
24 import org.apache.hadoop.io.Text;
25 import org.apache.hadoop.mapreduce.Job;
26 import org.apache.hadoop.mapreduce.Mapper;
27 import org.apache.hadoop.mapreduce.Reducer;
28 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
29 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
30 import org.apache.hadoop.util.GenericOptionsParser;
31
32 public class WordCount {
33
34 public static class TokenizerMapper
35       extends Mapper{
36
37    private final static IntWritable one = new IntWritable(1);
38    private Text word = new Text();
39
40    public void map(Object key, Text value, Context context
41                   ) throws IOException, InterruptedException {
42    StringTokenizer itr = new StringTokenizer(value.toString());
43    while (itr.hasMoreTokens()) {
44       word.set(itr.nextToken());
45       context.write(word, one);
46    }
47    }
48 }
49
50 public static class IntSumReducer
51       extends Reducer {
52    private IntWritable result = new IntWritable();
53
54    public void reduce(Text key, Iterable values,
55                      Context context
56                      ) throws IOException, InterruptedException {
57    int sum = 0;
58    for (IntWritable val : values) {
59       sum += val.get();
60    }
61    result.set(sum);
62    context.write(key, result);
63    }
64 }
65
66 public static void main(String[] args) throws Exception {
67    Configuration conf = new Configuration();
68    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
69    if (otherArgs.length != 2) {
70    System.err.println("Usage: wordcount  ");
71    System.exit(2);
72    }
73    Job job = new Job(conf, "word count");
74    job.setJarByClass(WordCount.class);
75    job.setMapperClass(TokenizerMapper.class);
76    job.setCombinerClass(IntSumReducer.class);
77    job.setReducerClass(IntSumReducer.class);
78    job.setOutputKeyClass(Text.class);
79    job.setOutputValueClass(IntWritable.class);
80    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
81    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
82    System.exit(job.waitForCompletion(true) ? 0 : 1);
83 }
84 }
View Code　　
　　在eclipse中新建java project，project名为WordCount
　　在project中新建class，类名为WordCount
　　再将上述代码覆盖eclipse中的WordCount.java
　　并将页首的package改了wordcount,改后的源码如下

1 package wordcount;
2
3 import java.io.IOException;
4 import java.util.StringTokenizer;
5
6 import org.apache.hadoop.conf.Configuration;
7 import org.apache.hadoop.fs.Path;
8 import org.apache.hadoop.io.IntWritable;
9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.GenericOptionsParser;
16
17 public class WordCount {
18
19 public static class TokenizerMapper
20       extends Mapper{
21
22    private final static IntWritable one = new IntWritable(1);
23    private Text word = new Text();
24
25    public void map(Object key, Text value, Context context
26                   ) throws IOException, InterruptedException {
27    StringTokenizer itr = new StringTokenizer(value.toString());
28    while (itr.hasMoreTokens()) {
29       word.set(itr.nextToken());
30       context.write(word, one);
31    }
32    }
33 }
34
35 public static class IntSumReducer
36       extends Reducer {
37    private IntWritable result = new IntWritable();
38
39    public void reduce(Text key, Iterable values,
40                      Context context
41                      ) throws IOException, InterruptedException {
42    int sum = 0;
43    for (IntWritable val : values) {
44       sum += val.get();
45    }
46    result.set(sum);
47    context.write(key, result);
48    }
49 }
50
51 public static void main(String[] args) throws Exception {
52    Configuration conf = new Configuration();
53    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
54    if (otherArgs.length != 2) {
55    System.err.println("Usage: wordcount  ");
56    System.exit(2);
57    }
58    Job job = new Job(conf, "word count");
59    job.setJarByClass(WordCount.class);
60    job.setMapperClass(TokenizerMapper.class);
61    job.setCombinerClass(IntSumReducer.class);
62    job.setReducerClass(IntSumReducer.class);
63    job.setOutputKeyClass(Text.class);
64    job.setOutputValueClass(IntWritable.class);
65    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
66    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
67    System.exit(job.waitForCompletion(true) ? 0 : 1);
68
69 }
70 }
View Code　　

1 import org.apache.hadoop.conf.Configuration;
2 import org.apache.hadoop.fs.Path;
3 import org.apache.hadoop.io.IntWritable;
4 import org.apache.hadoop.io.Text;
5 import org.apache.hadoop.mapreduce.Job;
6 import org.apache.hadoop.mapreduce.Mapper;
7 import org.apache.hadoop.mapreduce.Reducer;
8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 import org.apache.hadoop.util.GenericOptionsParser;
　　可以看到源码import了好几个hadoop自定义类，非JDK环境自带的类，所以需要把这些依赖包导入eclipse中，不然编译器如何能找到这些类呢，得明确让编译器知道这些类所在位置。
　　这时候编译并运行一下，会发现有如下错误

Exception in thread "main" java.lang.Error: Unresolved compilation problems:
The import org.apache.commons cannot be resolved
The import org.apache.commons cannot be resolved
The import org.codehaus cannot be resolved
The import org.codehaus cannot be resolved
Log cannot be resolved to a type
LogFactory cannot be resolved
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
JsonFactory cannot be resolved to a type
JsonFactory cannot be resolved to a type
JsonGenerator cannot be resolved to a type
at org.apache.hadoop.conf.Configuration.(Configuration.java:60)
at wordcount.WordCount.main(WordCount.java:52)
　　原因是缺少依赖的jar库文件，再把缺少的jar库文件添加入库即可。
　　使用Add External JARs添加hadoop1.2.1\lib目录下所有jar文件。

　　再一次编译并运行，成功

　　最后打包成为jar文件
　　file->export
　　

　　其中，WordCount.jar不是强求与类名相同，可以改为其他，譬如CountWord.jar,关系不大，然后点击Finish即可。
　　之后就可以在hadoop上运行了。运行WordCount详解可以参考Hadoop集群（第6期）_WordCount运行详解

1 hadoop jar WordCount.jar WordCount input output
　　注意上述代码中是没有

1 package org.apache.hadoop.examples;
　　倘若使用了package，那么jar文件中就有层次的，不再如hadoop jar WordCount.jar WordCount input output就可以运行了，需要详细指出WordCount（这个是主类的类名），运行命令改为

hadoop jar WordCount.jar org.apache.hadoop.examples.WordCount input output
　　关于这里打包的内容，在[hadoop]命令行编译并运行hadoop例子WordCount有讲述

本文基于知识共享署名-非商业性使用 3.0 许可协议进行许可。欢迎转载、演绎，但是必须保留本文的署名林羽飞扬,若需咨询，请给我发信

账号		自动登录	找回密码
密码			立即注册

Centos6.5×64安装配置openmeetings3.0.3详

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

[经验分享] [hadoop]Windows下eclipse导入hadoop源码，编译WordCount，打包jar

扫码加入运维网微信交流群