Hadoop2.6.0学习笔记（七）MapReduce分区

xiu12 · 发表于 2018-10-30 06:51:18

package com.lucl.hadoop.mapreduce.part;　　

　　
import java.io.DataOutputStream;
　　
import java.io.IOException;
　　
import java.io.UnsupportedEncodingException;
　　
import java.util.HashMap;
　　

　　
import org.apache.hadoop.fs.Path;
　　
import org.apache.hadoop.io.Text;
　　
import org.apache.hadoop.mapreduce.OutputCommitter;
　　
import org.apache.hadoop.mapreduce.RecordWriter;
　　
import org.apache.hadoop.mapreduce.TaskAttemptContext;
　　
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
　　
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
　　

　　
/**
　　
*
　　
* @author luchunli
　　
* @description 自定义OutputFormat
　　
*/
　　
public class ProtocolOutputFormat extends TextOutputFormat {
　　
protected static class ProtocolRecordWriter extends RecordWriter {
　　
      private static final String utf8 = "UTF-8";
　　
      private static final byte[] newline;
　　
      static {
　　
      try {
　　
         newline = "\n".getBytes(utf8);
　　
      } catch (UnsupportedEncodingException uee) {
　　
         throw new IllegalArgumentException("can't find " + utf8 + " encoding");
　　
      }
　　
      }
　　

　　
      protected TaskAttemptContext context = null;
　　

　　
      protected HashMap recordStream = null;
　　
      protected Path workPath = null;
　　

　　
      public ProtocolRecordWriter () {}
　　

　　
      public ProtocolRecordWriter (TaskAttemptContext context, Path workPath) {
　　
         this.context = context;
　　
         this.workPath = workPath;
　　
         recordStream = new HashMap();
　　
      }
　　

　　
      @Override
　　
      public void write(Text key, Text value) throws IOException, InterruptedException {
　　
            boolean nullKey = key == null;
　　
            boolean nullValue = value == null;
　　
            if (nullKey && nullValue) {
　　
            return;
　　
            }
　　
            DataOutputStream out = recordStream.get(key);
　　
            if (null == out) {
　　
               Path file = new Path(workPath, key + ".txt");
　　
               out = file.getFileSystem(this.context.getConfiguration()).create(file, false);
　　
               recordStream.put(key, out);
　　
            }
　　
            if (!nullKey) {
　　
               out.write(key.getBytes(), 0, key.getLength());
　　
            }
　　
            if (!(nullKey || nullValue)) {
　　
            out.write("\t".getBytes());
　　
            }
　　
            if (!nullValue) {
　　
               out.write(value.getBytes(), 0, value.getLength());
　　
            }
　　
            out.write(newline);
　　
      }
　　

　　
      @Override
　　
      public void close(TaskAttemptContext context) throws IOException,
　　
            InterruptedException {
　　
         for (DataOutputStream out : recordStream.values()) {
　　
            out.close();
　　
         }
　　
         recordStream.clear();
　　
         recordStream = null;
　　
      }
　　
}
　　

　　
@Override
　　
public RecordWriter getRecordWriter(TaskAttemptContext context)
　　
         throws IOException, InterruptedException {
　　
      Path workPath = this.getTaskOutputPath(context);
　　
      return new ProtocolRecordWriter(context, workPath);
　　
}
　　

　　
private Path getTaskOutputPath(TaskAttemptContext context) throws IOException {
　　
      Path workPath = null;
　　
      OutputCommitter committer = super.getOutputCommitter(context);
　　

　　
      if (committer instanceof FileOutputCommitter) {
　　
         // Get the directory that the task should write results into.
　　
         workPath = ((FileOutputCommitter) committer).getWorkPath();
　　
      } else {
　　
         // Get the {@link Path} to the output directory for the map-reduce job.
　　
         // context.getConfiguration().get(FileOutputFormat.OUTDIR);
　　
         Path outputPath = super.getOutputPath(context);
　　
         if (null == outputPath) {
　　
            throw new IOException("Undefined job output-path.");
　　
         }
　　
         workPath = outputPath;
　　
      }
　　

　　
      return workPath;
　　
}
　　
}

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] Hadoop2.6.0学习笔记（七）MapReduce分区

浏览过的版块

扫码加入运维网微信交流群