Tuesday, September 23, 2014

Configuration Files

mapred-site.xml

<configuration>
<property>
  <name>mapred.tasktracker.reduce.tasks.maximum</name>
  <value>9</value>
</property>
<property>
  <name>mapred.tasktracker.map.tasks.maximum</name>
  <value>24</value>
</property>
<property>
  <name>mapred.map.tasks.speculative.execution</name>
  <value>false</value>
</property>
<property>
  <name>mapred.reduce.tasks.speculative.execution</name>
  <value>false</value>
</property>
<property>
  <name>mapreduce.tasktracker.group</name>
  <value>root</value>
  <description>Expert: Group to which TaskTracker belongs. If
  LinuxTaskController is configured via mapreduce.tasktracker.taskcontroller,
  the group owner of the task-controller binary '$HADOOP_HOME/bin/platform/bin/task-controller'
  should be same as this group.
  </description>
</property>
<property>
  <name>mapred.local.dir</name>
  <value>/tmp/mapr-hadoop/mapred/local</value>
  <description>The local directory where MapReduce stores job jar, xml files and
  creates work dirs for tasks. MapR hadoop uses a local volume map outputs.
  </description>
</property>
<property>
  <name>jobtracker.thrift.address</name>
  <value>0.0.0.0:9290</value>
</property>
<property>
  <name>mapred.jobtracker.plugins</name>
  <value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
  <description>Comma-separated list of jobtracker plug-ins to be activated.</description>
</property>
<property>
  <name>mapred.map.child.java.opts</name>
  <value>-Xmx2604m</value>
</property>
<property>
  <name>mapred.reduce.child.java.opts</name>
  <value>-Xmx3385m</value>
</property>

</configuration>
---------------------------------------------------------------------------------

core-site.xml

<configuration>
<!-- mytable1 => /user/aaa/mytable1 -->
<!-- mytable2 => /user/bbb/mynewtable -->
<!-- yourtable => /tables_dir/yourtable -->
<!-- theirtable => /tables_dir/theirtable -->
<!--
  <property>
    <name>hbase.table.namespace.mappings</name>
    <value>mytable1:/user/aaa/,mytable2:/user/bbb/mynewtable,*:/tables_dir</value>
  </property>
-->
<property>
  <name>hadoop.proxyuser.mapr.hosts</name>
  <value>*</value>
</property>
<property>
  <name>hadoop.proxyuser.mapr.groups</name>
  <value>*</value>
</property>
</configuration>
-----------------------------------------------------------------------------------------------
hadoop-metrics.properties

# Configuration of the "dfs" context for null
dfs.class=org.apache.hadoop.metrics.spi.NullContext

# Configuration of the "dfs" context for file
#dfs.class=org.apache.hadoop.metrics.file.FileContext
#dfs.period=10
#dfs.fileName=/tmp/dfsmetrics.log

# Configuration of the "dfs" context for ganglia
# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
 dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
 dfs.period=10
 dfs.servers=c010s.sys.net:8649


# Configuration of the "mapred" context for default
mapred.class=org.apache.hadoop.metrics.spi.MapRDefaultContext
mapred.period=30


# Configuration of the "mapred" context for file
#mapred.class=org.apache.hadoop.metrics.file.FileContext
#mapred.period=10
#mapred.fileName=/tmp/mrmetrics.log

# Configuration of the "mapred" context for ganglia
# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
 mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
 mapred.period=10
 mapred.servers=010s.sys.net:8649


# Configuration of the "jvm" context for null
#jvm.class=org.apache.hadoop.metrics.spi.NullContext

# Configuration of the "jvm" context for file
#jvm.class=org.apache.hadoop.metrics.file.FileContext
#jvm.period=10
#jvm.fileName=/tmp/jvmmetrics.log

# Configuration of the "jvm" context for ganglia
 jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
 jvm.period=10
 jvm.servers=ebdp-ch2-c010s.sys..net:8649

## Add Hbase,RPC and UGI

ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
ugi.period=10
ugi.servers=ebdp-ch2-c010s.sys.net:8649

hbase.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
hbase.period=10
hbase.servers=ebdp-ch2-c010s.sys.net:8649

rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
rpc.period=10
rpc.servers=ebdp-ch2-c010s.sys.net:8649

# Configuration of the "ugi" context for null
#ugi.class=org.apache.hadoop.metrics.spi.NullContext


# Configuration of the "fairscheduler" context for null
#fairscheduler.class=org.apache.hadoop.metrics.spi.NullContext

# Configuration of the "fairscheduler" context for file
#fairscheduler.class=org.apache.hadoop.metrics.file.FileContext
#fairscheduler.period=10
#fairscheduler.fileName=/tmp/fairschedulermetrics.log

# Configuration of the "fairscheduler" context for ganglia
 fairscheduler.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
fairscheduler.period=10
 fairscheduler.servers=ebdp-ch2-c010s.sys.net:8649

maprmepredvariant.class=job.mngmnt.hadoop.metrics.MaprRPCContext
maprmepredvariant.period=60
maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal
maprmapred.period=60
-------------------------------------------------------------------------------

hadoop-env.sh

if [[ ! ${HADOOP_ENV_SH_SOURCED} ]]; then

# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.  Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun

# Extra Java CLASSPATH elements.  Optional.
# export HADOOP_CLASSPATH="<extra_entries>:$HADOOP_CLASSPATH"

# The maximum amount of heap to use, in MB. Default is 1000.
# export HADOOP_HEAPSIZE=2000

# Extra Java runtime options.  Empty by default.
# export HADOOP_OPTS=-server

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
export HADOOP_JOBTRACKER_OPTS="-Dmapr.library.flatclass -Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
#export HADOOP_TASKTRACKER_OPTS=""
export HADOOP_TASKTRACKER_ROOT_LOGGER="INFO,DRFA"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
# export HADOOP_CLIENT_OPTS

# Extra ssh options.  Empty by default.
# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"

# Where log files are stored.  $HADOOP_HOME/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves

# host:path where hadoop code should be rsync'd from.  Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop

# Seconds to sleep between slave commands.  Unset by default.  This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HADOOP_SLAVE_SLEEP=0.1

# The directory where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/var/hadoop/pids

# A string representing this instance of hadoop. $USER by default.
# export HADOOP_IDENT_STRING=$USER

# The scheduling priority for daemon processes.  See 'man nice'.
# export HADOOP_NICENESS=10

fi

export HADOOP_ENV_SH_SOURCED=true
------------------------------------------------------------------------------------------------


No comments:

Post a Comment