<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Do not modify this file directly. Instead, copy entries that you -->
<!-- wish to modify from this file into core-site.xml and change them -->
<!-- there. If core-site.xml does not already exist, create it. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoop-1:9000/</value>
<description></description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>fs.inmemory.size.mb</name>
<value>10</value>
<description>Larger amount of memory allocated for the in-memory file-system used to merge map-outputs at the reduce
s.</description>
</property>
<property>
<name>io.sort.factor</name>
<value>10</value>
<description>More streams merged at once while sorting files.</description>
</property>
<property>
<name>io.sort.mb</name>
<value>10</value>
<description>Higher memory-limit while sorting data.</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<description>Size of read/write buffer used in SequenceFiles.</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/storage1/tmp/hadoop-${user.name}</value>
<description></description>
</property>
</configuration>
上面配置内容,是与HDFS的基本属性相关的,一般在系统运行过程中比较固定的配置,都放到这里面。如果需要根据实际应用的变化,可以配置到hdfs-site.xml文件中,下面会解释。
6、配置
hdfs-site.xml文件
配置文件 vim /usr/local/hadoop-0.20.2/conf/hdfs-site.xml 的内容,如下所示:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.name.dir</name>
<value>/home/hadoop/storage1/name/a,/home/hadoop/storage1/name/b</value>
<description>Path on the local filesystem where the NameNode stores the namespace and transactions logs persistently
.</description>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/hadoop/storage1/data/a,/home/hadoop/storage1/data/b,/home/hadoop/storage1/data/c</value>
<description>Comma separated list of paths on the local filesystem of a DataNode where it should store its blocks.</
description>
</property>
<property>
<name>dfs.block.size</name>
<value>67108864</value>
<description>HDFS blocksize of 64MB for large file-systems.</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>10</value>
<description>More NameNode server threads to handle RPCs from large number of DataNodes.</description>
</property>
</configuration>
该配置文件配置与HDFS相关的属性,而且这些属性可能在使用计算过程中(如进行MapReduce计算),需要变化,如数据存储目
录等等。如果该配置文件hdfs-site.xml和core-site.xml中同时都配置了某个属性,则hdfs-site.xml会覆盖掉
core-site.xml中配置的属性。
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>hdfs://hadoop-1:9001/</value>
<description>Host or IP and port of JobTracker.</description>
</property>
<property>
<name>mapred.system.dir</name>
<value>/home/hadoop/storage1/mapred/system</value>
<description>Path on the HDFS where where the MapReduce framework stores system files.Note: This is in the default filesystem (HDFS) and must be accessible from both the server and client machines.</description>
</property>
<property>
<name>mapred.local.dir</name>
<value>/home/hadoop/storage1/mapred/local</value>
<description>Comma-separated list of paths on the local filesystem where temporary MapReduce data is written. Note: Multiple paths help spread disk i/o.</description>
</property>
<property>
<name>mapred.tasktracker.map.tasks.maximum</name>
<value>10</value>
<description>The maximum number of Map tasks, which are run simultaneously on a given TaskTracker, individually.Note: Defaults to 2 maps, but vary it depending on your hardware.</description>
</property>
<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>2</value>
<description>The maximum number of Reduce tasks, which are run simultaneously on a given TaskTracker, individually. Note: Defaults to 2 reduces, but vary it depending on your hardware.</description>
</property>
<property>
<name>mapred.reduce.parallel.copies</name>
<value>5</value>
<description>Higher number of parallel copies run by reduces to fetch outputs from very large number of maps.</description>
</property>
<property>
<name>mapred.map.child.java.opts</name>
<value>-Xmx1024M</value>
<description>Larger heap-size for child jvms of maps.</description>
</property>
<property>
<name>mapred.reduce.child.java.opts</name>
<value>-Xms300M</value>
<description>Larger heap-size for child jvms of reduces.</description>
</property>
<property>
<name>tasktracker.http.threads</name>
<value>5</value>
<description>More worker threads for the TaskTracker's http server. The http server is used by reduces to fetch intermediate map-outputs.</description>
</property>
<property>
<name>mapred.queue.names</name>
<value>default</value>
<description>Comma separated list of queues to which jobs can be submitted. Note: The MapReduce system always supports atleast one queue with the name as default. Hence, this parameter's value should always contain the string default. Some job schedulers supported in Hadoop, like the Capacity Scheduler(http://hadoop.apache.org/common/docs/stable/capacity_scheduler.html), support multiple queues. If such a scheduler is being used, the list of configured queue names must be specified here. Once queues are defined, users can submit jobs to a queue using the property name mapred.job.queue.name in the job configuration. There could be a separate configuration file for configuring properties of these queues that is managed by the scheduler. Refer to the documentation of the scheduler for information on the same.</description>
</property>
<property>
<name>mapred.acls.enabled</name>
<value>false</value>
<description>Boolean, specifying whether checks for queue ACLs and job ACLs are to be done for authorizing users for doing queue operations and job operations. Note: If true, queue ACLs are checked while submitting and administering jobs and job ACLs are checked for authorizing view and modification of jobs. Queue ACLs are specified using the configuration parameters of the form mapred.queue.queue-name.acl-name, defined below under mapred-queue-acls.xml. Job ACLs are described at Job Authorization(http://hadoop.apache.org/common/docs/stable/mapred_tutorial.html#Job+Authorization).</description>
</property>
</configuration>