disl 发表于 2017-2-28 09:15:24

Hadoop2.2.0(yarn)编译部署手册

  Created on 2014-3-30
URL    : http://www.cnblogs.com/zhxfl/p/3633919.html
@author: zhxfl


  Hadoop-2.2编译
  由于Hadoop-2.2只发布了32位的版本,所以如果是在64位操作系统上,需要重新编译
  安装maven
  安装maven,查找其安装目录



sudo apt-get intall maven
Find /usr -name “*maven*”
  根据其安装目录添加环境变量



export M2_HOME=/usr/shared/maven
export PATH=$PATH:$M2_HOME/bin
export MAVEN_OPTS="-Xms256m -Xmx512m"
  安装google protobuf



wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz
/configure--prefix=/usr/local/protobuf   
sudo make && sudo make install
sudo vim /etc/ld.so.conf   [添加/usr/local/lib]
ldconfig
  安装Cmake



sudo apt-get install cmake
  安装依赖库



libglib2.0-dev libssl-dev
  pom.xml 中有个bug,添加下面patch即可
  参考 https://issues.apache.org/jira/browse/HADOOP-10110



Index: hadoop-common-project/hadoop-auth/pom.xml
===================================================================
--- hadoop-common-project/hadoop-auth/pom.xml   (revision 1543124)
+++ hadoop-common-project/hadoop-auth/pom.xml   (working copy)
@@ -54,6 +54,11 @@
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<scope>test</scope>
</dependency>
  开始编译:



mvn package -Pdist,native -DskipTests -Dtar
  常见错误
   Failed to execute goal org.apache.maven.plugins:maven-antrun-plugin:1.6:run (make) on project hadoop-hdfs: An Ant BuildException has occured: exec returned: 1 ->

  安装libglib2.0-dev
  
Failed to execute goal org.apache.maven.plugins:maven-antrun-plugin:1.6:run
(make) on project hadoop-pipes: An Ant BuildException has occured: exec
returned: 1 ->

  安装libssl-dev
  
/home/yarn/hadoop-2.2.0-src/hadoop-common-project/hadoop-auth/src/test/j                                                                                       
ava/org/apache/hadoop/security/authentication/client/AuthenticatorTestCase.java:                                                                                       
cannot access org.mortbay.component.AbstractLifeCycle
  参考 https://issues.apache.org/jira/browse/HADOOP-10110
  最后在目录~/hadoop-2.2.0-src/hadoop-dist/target中有一个hadoop-2.2.0的目录就是编译出来的版本
  
  Hadoop-2.2环境配置
  添加用户
  每个节点都添加yarn用户
  添加用户
  sudo adduser yarn
  把用户添加到hadoop组中(如果你没有一个hadoop的组,需要新建这个组)
  sudo gpasswd -a yarn hadoop
  给yarn用户添加sudo权限
  sudo vim /etc/sudoers
  添加如下语句
  yarn ALL=(ALL:ALL) ALL
  ssh配置
  针对master
  sudo apt-get install openssh-server
  ssh-keygen(一直按enter即可)
  在~/.ssh目录下,有一个id_rsa(私钥),一个id_rsa.pub(公钥)
  cat ~/.ssh/id_rsa.pub >>
~/.ssh/authorized_keys
  输入ssh
localhost确认是否需要输入密码
  拷贝authorized_keys
到slave1~slave3节点
  scp authorized_keys
yarn@slave1:~/.ssh/
  scp authorized_keys yarn@slave2:~/.ssh/
  scp authorized_keys yarn@slave3:~/.ssh/
  export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/
  针对salves
  都要执行ssh-keygen
  最后在master节点上使用ssh slave1等测试
  其他
  每个节点上面都需要添加如下ip
  vim /etc/hosts
  219.219.216.48
master
  219.219.216.47
slave1
  219.219.216.45
slave2
  219.219.216.46
slave3
  参考
  http://dongxicheng.org/mapreduce-nextgen/hadoop-yarn-install/
  配置文件
  hadoop-env.sh
  添加JAVA_HOME环境变量



export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/
  Core-site.xml



<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:8020</value>
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/yarn/hadoop-files/tmp</value>
</property>
</configuration>
  Hdfs-site.xml



<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/yarn/hadoop-files/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/yarn/hadoop-files/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/yarn/hadoop-files/tmp/</value>
<description>A base for other temporary directories.</description>
</property>
</configuration>
  红色部分根据你实际的目录而定。
  mapred-site.xml



<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx1024m</value>
</property>
</configuration>
  yarn-site.xml



<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>master:8888</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.loacl-dirs</name>
<value>/home/yarn/hadoop-2.2.0/hadoop-files/hadoop-loacl-dirs/</value>
<final>true</final>
</property>
</configuration>
  其他可能需要设置的环境变量



export M2_HOME=/usr/share/maven
export PATH=$PATH:$M2_HOME/bin:~/hadoop-2.2.0/bin
export MAVEN_OPTS="-Xms2048m -Xmx2048m"
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/
export HADOOP_HOME="/home/yarn/hadoop-2.2.0"
export HADOOP_PREFIX="/home/yarn/hadoop-2.2.0"
export YARN_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop/
export YARN_CONF_DIR=$HADOOP_CONF_DIR
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export SCALA_HOME=/usr/share/scala/
export PATH=$SCALA_HOME/bin/:$PATH
  测试
  改变文件用户组属性



./hdfs dfs -chgrp -R yarn
./hdfs dfs -mkdir /yarn
  这样yarn文件夹就属于用户yarn的了
  编译和提交任务错误参考:
  http://www.cnblogs.com/lucius/p/3435296.html
页: [1]
查看完整版本: Hadoop2.2.0(yarn)编译部署手册