HADOOP 调试MR job
调试MR job, 最好在单机环境中,这样可以降低问题的复杂度。一 推荐在eclipse下进行调试,所以先安装hadoop-eclipse 插件,注意对插件的jar进行修改
1: 向 lib 文件夹加入 依赖的jar包。
2 : 修改 meta-inf 文件
二 在eclipse中新建MR project,编写适当的逻辑,右键以run on hadoop 启动WordCount 的类,在此之前需要在工程的classpath下的hadoop配置文件中增加
[*]
[*]
[*] mapred.child.java.opts
[*]
[*] -Xmx2000m -Xdebug -Xrunjdwp:transport=dt_socket,address=7788,server=y,suspend=y
[*]
[*]
[*]
[*]
[*]
[*]
[*]
[*] mapred.tasktracker.map.tasks.maximum
[*]
[*] 1
[*]
[*] tasktracker的map任务上限
[*]
[*]
[*]
[*]
[*]
[*]
[*]
[*] mapred.tasktracker.reduce.tasks.maximum
[*]
[*] 1
[*]
[*] tasktracker的reduce任务上限
[*]
[*]
[*]
[*]
[*]
[*]
[*]
[*] mapred.task.timeout
[*]
[*] 100000000
[*]
[*]
这些配置指定了tasktracker启动jvm运行task时的参数。
三 因为我们的MR工程没有被打包,所以要有一个自打包程序(此段程序转载于网上)
[*]import java.io.File;
[*]
[*]import java.io.FileInputStream;
[*]
[*]import java.io.FileOutputStream;
[*]
[*]import java.io.IOException;
[*]
[*]import java.net.MalformedURLException;
[*]
[*]import java.net.URL;
[*]
[*]import java.net.URLClassLoader;
[*]
[*]import java.util.LinkedList;
[*]
[*]import java.util.List;
[*]
[*]import java.util.jar.JarEntry;
[*]
[*]import java.util.jar.JarOutputStream;
[*]
[*]import java.util.jar.Manifest;
[*]
[*]
[*]
[*]public class EJob {
[*]
[*]
[*]
[*] private static List classPath = new LinkedList();
[*]
[*]
[*]
[*] public static void addClasspath(String path){
[*]
[*] try {
[*]
[*] classPath.add(new URL(path));
[*]
[*] } catch (MalformedURLException e) {
[*]
[*] // TODO Auto-generated catch block
[*]
[*] e.printStackTrace();
[*]
[*] }
[*]
[*] }
[*]
[*]
[*]
[*] public static ClassLoader getClassLoader() {
[*]
[*] ClassLoader parent = Thread.currentThread().getContextClassLoader();
[*]
[*] if (parent == null) {
[*]
[*] parent = EJob.class.getClassLoader();
[*]
[*] }
[*]
[*] if (parent == null) {
[*]
[*] parent = ClassLoader.getSystemClassLoader();
[*]
[*] }
[*]
[*] return new URLClassLoader(classPath.toArray(new URL), parent);
[*]
[*] }
[*]
[*]
[*]
[*] public static File createTempJar(String root) throws IOException {
[*]
[*] if (!new File(root).exists()) {
[*]
[*] return null;
[*]
[*] }
[*]
[*] Manifest manifest = new Manifest();
[*]
[*] manifest.getMainAttributes().putValue("Manifest-Version", "1.0");
[*]
[*] final File jarFile = File.createTempFile("EJob-", ".jar", new File(System
[*]
[*] .getProperty("java.io.tmpdir")));
[*]
[*]
[*]
[*] Runtime.getRuntime().addShutdownHook(new Thread() {
[*]
[*] public void run() {
[*]
[*] jarFile.delete();
[*]
[*] }
[*]
[*] });
[*]
[*]
[*]
[*] JarOutputStream out = new JarOutputStream(new FileOutputStream(jarFile),
[*]
[*] manifest);
[*]
[*] createTempJarInner(out, new File(root), "");
[*]
[*] out.flush();
[*]
[*] out.close();
[*]
[*] return jarFile;
[*]
[*] }
[*]
[*]
[*]
[*] private static void createTempJarInner(JarOutputStream out, File f,
[*]
[*] String base) throws IOException {
[*]
[*] if (f.isDirectory()) {
[*]
[*] File[] fl = f.listFiles();
[*]
[*] if (base.length() > 0) {
[*]
[*] base = base + "/";
[*]
[*] }
[*]
[*] for (int i = 0; i < fl.length; i++) {
[*]
[*] createTempJarInner(out, fl, base + fl.getName());
[*]
[*] }
[*]
[*] } else {
[*]
[*] out.putNextEntry(new JarEntry(base));
[*]
[*] FileInputStream in = new FileInputStream(f);
[*]
[*] byte[] buffer = new byte;
[*]
[*] int n = in.read(buffer);
[*]
[*] while (n != -1) {
[*]
[*] out.write(buffer, 0, n);
[*]
[*] n = in.read(buffer);
[*]
[*] }
[*]
[*] in.close();
[*]
[*] }
[*]
[*] }
[*]
[*]
[*]
[*]}
最后在WordCount的main函数中增加
[*]File jarFile = EJob.createTempJar("bin");
[*]ClassLoader classLoader = EJob.getClassLoader();
[*]Thread.currentThread().setContextClassLoader(classLoader);
[*]
[*]((JobConf)job.getConfiguration()).setJar(jarFile.toString());
这样在eclipse建立远程调试就可以连接上7788端口了
页:
[1]