initial revision.
@@ -0,0 +1,195 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
3 | + | |
4 | +<!-- Put site-specific property overrides in this file. --> | |
5 | + | |
6 | +<configuration> | |
7 | + <property> | |
8 | + <name>this.secondary.namenode.fqdn</name> | |
9 | + <value>localhost</value> | |
10 | + <!-- <value>${this.cluster.name}-cn.${this.domain}</value> --> | |
11 | + </property> | |
12 | + | |
13 | + <property> | |
14 | + <name>dfs.name.dir</name> | |
15 | + <value>/grid/vol/0/var/lib/${user.name}/name</value> | |
16 | + <!-- <value>/grid/vol/0/var/lib/${user.name}/name,/export/home/${user.name}/var/lib/name</value> --> | |
17 | + </property> | |
18 | + <property> | |
19 | + <name>dfs.data.dir</name> | |
20 | + <value>/grid/vol/0/var/lib/${user.name}/data</value> | |
21 | + <!-- <value>/grid/vol/0/var/lib/${user.name}/data,/grid/vol/1/var/lib/${user.name}/data</value> --> | |
22 | + </property> | |
23 | + <property> | |
24 | + <name>dfs.replication</name> | |
25 | + <value>1</value> | |
26 | + <!-- <value>3</value> --> | |
27 | + </property> | |
28 | + | |
29 | + <property> | |
30 | + <name>dfs.hosts</name> | |
31 | + <value>/grid/usr/hadoop/conf/hosts.include</value> | |
32 | + <description> | |
33 | + Names a file that contains a list of hosts that are permitted to connect to the namenode. | |
34 | + The full pathname of the file must be specified. If the value is empty, all hosts are permitted. | |
35 | + </description> | |
36 | + </property> | |
37 | + <property> | |
38 | + <name>dfs.hosts.exclude</name> | |
39 | + <value>/grid/usr/hadoop/conf/hosts.exclude</value> | |
40 | + <description> | |
41 | + Names a file that contains a list of hosts that are not permitted to connect to the namenode. | |
42 | + The full pathname of the file must be specified. If the value is empty, no hosts are excluded. | |
43 | + </description> | |
44 | + </property> | |
45 | + | |
46 | + <property> | |
47 | + <name>dfs.cluster.administrators</name> | |
48 | + <value> hdfs</value> | |
49 | + </property> | |
50 | + <property> | |
51 | + <name>dfs.http.port</name> | |
52 | + <value>50070</value> | |
53 | + <description>The http port where namenode binds</description> | |
54 | + </property> | |
55 | + <property> | |
56 | + <name>dfs.http.address</name> | |
57 | + <value>${this.namenode.fqdn}:${dfs.http.port}</value> | |
58 | + <description>The http address where namenode binds</description> | |
59 | + </property> | |
60 | + <!-- for KSSL (NOT RECOMMENDED) --> | |
61 | + <property> | |
62 | + <name>hadoop.security.use-weak-http-crypto</name> | |
63 | + <value>false</value> | |
64 | + </property> | |
65 | + <property> | |
66 | + <name>dfs.namenode.kerberos.principal</name> | |
67 | + <value>hdfs/_HOST@${this.realm}</value> | |
68 | + <!-- _HOST is replaced with the fs.default.name's host name --> | |
69 | + <!-- <value>hdfs/${this.namenode.fqdn}@${this.realm}</value> --> | |
70 | + <description>Kerberos principal name for the NameNode</description> | |
71 | + </property> | |
72 | + <property> | |
73 | + <name>dfs.namenode.kerberos.https.principal</name> | |
74 | + <value>host/${this.namenode.fqdn}@${this.realm}</value> | |
75 | + <!-- <value>host/_HOST@${this.realm}</value> v1.0.4: NG! --> | |
76 | + <description> | |
77 | + The Kerberos principal for the host that the NameNode runs on. | |
78 | + </description> | |
79 | + </property> | |
80 | + <property> | |
81 | + <name>dfs.namenode.keytab.file</name> | |
82 | + <value>${this.keytab.dir}/nn.keytab</value> | |
83 | + <description> | |
84 | + Combined keytab file containing the namenode service and host | |
85 | + principals. | |
86 | + </description> | |
87 | + </property> | |
88 | + <property> | |
89 | + <name>dfs.secondary.http.port</name> | |
90 | + <value>50090</value> | |
91 | + <description>The http port where secondary namenode binds</description> | |
92 | + </property> | |
93 | + <property> | |
94 | + <name>dfs.secondary.http.address</name> | |
95 | + <value>${this.secondary.namenode.fqdn}:${dfs.secondary.http.port}</value> | |
96 | + <description>The http address where secondary namenode binds</description> | |
97 | + </property> | |
98 | + <property> | |
99 | + <name>dfs.secondary.namenode.kerberos.principal</name> | |
100 | + <value>hdfs/${this.secondary.namenode.fqdn}@${this.realm}</value> | |
101 | + <!-- <value>hdfs/_HOST@${this.realm}</value> v1.0.4: NG! --> | |
102 | + <description> | |
103 | + Kerberos principal name for the secondary NameNode. | |
104 | + </description> | |
105 | + </property> | |
106 | + <property> | |
107 | + <name>dfs.secondary.namenode.kerberos.https.principal</name> | |
108 | + <value>host/${this.secondary.namenode.fqdn}@${this.realm}</value> | |
109 | + <!-- <value>host/_HOST@${this.realm}</value> v1.0.4: NG! --> | |
110 | + <description> | |
111 | + The Kerberos principal for the host that the secondary NameNode | |
112 | + runs on. | |
113 | + </description> | |
114 | + </property> | |
115 | + <property> | |
116 | + <name>dfs.secondary.namenode.keytab.file</name> | |
117 | + <value>${this.keytab.dir}/cn.keytab</value> | |
118 | + <description> | |
119 | + Combined keytab file containing the namenode service and host | |
120 | + principals. | |
121 | + </description> | |
122 | + </property> | |
123 | + <property> | |
124 | + <name>dfs.block.access.token.enable</name> | |
125 | + <value>true</value> | |
126 | + <description> | |
127 | + If "true", access tokens are used as capabilities for accessing | |
128 | + datanodes. | |
129 | + If "false", no access tokens are checked on accessing datanodes. | |
130 | + </description> | |
131 | + </property> | |
132 | + <property> | |
133 | + <name>dfs.datanode.kerberos.principal</name> | |
134 | + <value>hdfs/localhost@${this.realm}</value> | |
135 | + <!-- <value>hdfs/_HOST@${this.realm}</value> --> | |
136 | + <description> | |
137 | + The Kerberos principal that the DataNode runs as. "_HOST" is | |
138 | + replaced by the real host name. | |
139 | + </description> | |
140 | + </property> | |
141 | + <property> | |
142 | + <name>dfs.datanode.keytab.file</name> | |
143 | + <value>${this.keytab.dir}/dn.keytab</value> | |
144 | + <description> | |
145 | + The filename of the keytab file for the DataNode. | |
146 | + </description> | |
147 | + </property> | |
148 | + | |
149 | + <property> | |
150 | + <name>dfs.datanode.address</name> | |
151 | + <value>0.0.0.0:1004</value> | |
152 | + </property> | |
153 | + <property> | |
154 | + <name>dfs.datanode.http.address</name> | |
155 | + <value>0.0.0.0:1006</value> | |
156 | + </property> | |
157 | + | |
158 | + <property> | |
159 | + <name>dfs.namenode.http-address</name> | |
160 | + <value>${this.namenode.fqdn}:50070</value> | |
161 | + </property> | |
162 | + <property> | |
163 | + <name>dfs.namenode.secondary.http-address</name> | |
164 | + <value>${this.secondary.namenode.fqdn}:50090</value> | |
165 | + </property> | |
166 | + <property> | |
167 | + <name>dfs.web.authentication.kerberos.principal</name> | |
168 | + <value>HTTP/_HOST@${this.realm}</value> | |
169 | + </property> | |
170 | + <property> | |
171 | + <name>dfs.web.authentication.kerberos.keytab</name> | |
172 | + <value>${this.keytab.dir}/HTTP.keytab</value> | |
173 | + </property> | |
174 | + <property> | |
175 | + <name>dfs.namenode.kerberos.internal.spnego.principal</name> | |
176 | + <value>${dfs.web.authentication.kerberos.principal}</value> | |
177 | + <!-- <value>HTTP/_HOST@${this.realm}</value> --> | |
178 | + <!-- _HOST is replaced with dfs.namenode.http-address's host name. --> | |
179 | + </property> | |
180 | + <property> | |
181 | + <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name> | |
182 | + <value>HTTP/${this.secondary.namenode.fqdn}@${this.realm}</value> | |
183 | + <!-- <value>${dfs.web.authentication.kerberos.principal}</value> --> | |
184 | + <!-- <value>HTTP/_HOST@${this.realm}</value> --> | |
185 | + <!-- _HOST is NOT replaced with dfs.namenode.secondary.http-address's host name?! --> | |
186 | + </property> | |
187 | + <property> | |
188 | + <name>dfs.support.append</name> | |
189 | + <value>true</value> | |
190 | + </property> | |
191 | + <property> | |
192 | + <name>dfs.datanode.max.xcievers</name> | |
193 | + <value>4096</value> | |
194 | + </property> | |
195 | +</configuration> |
@@ -0,0 +1,144 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
3 | + | |
4 | +<!-- Put site-specific property overrides in this file. --> | |
5 | + | |
6 | +<configuration> | |
7 | + <property> | |
8 | + <name>this.cluster.name</name> | |
9 | + <value>localhost</value> | |
10 | + <!-- <value>pleiades</value> --> | |
11 | + </property> | |
12 | + <property> | |
13 | + <name>this.domain</name> | |
14 | + <value>localhost</value> | |
15 | + <!-- <value>grid.example.com</value> --> | |
16 | + </property> | |
17 | + <property> | |
18 | + <name>this.realm</name> | |
19 | + <value>LOCALDOMAIN</value> | |
20 | + <!-- <value>GRID.EXAMPLE.COM</value> --> | |
21 | + </property> | |
22 | + <property> | |
23 | + <name>this.keytab.dir</name> | |
24 | + <value>/grid/etc/keytabs/localhost</value> | |
25 | + </property> | |
26 | + <property> | |
27 | + <name>this.namenode.fqdn</name> | |
28 | + <value>localhost</value> | |
29 | + <!-- <value>${this.cluster.name}-nn.${this.domain}</value> --> | |
30 | + </property> | |
31 | + | |
32 | + <property> | |
33 | + <name>fs.default.name</name> | |
34 | + <value>hdfs://${this.namenode.fqdn}:9000</value> | |
35 | + </property> | |
36 | + <property> | |
37 | + <name>hadoop.tmp.dir</name> | |
38 | + <value>/tmp/hadoop-${user.name}</value> | |
39 | + </property> | |
40 | + <property> | |
41 | + <name>fs.checkpoint.dir</name> | |
42 | + <value>/grid/vol/0/var/lib/${user.name}/checkpoint</value> | |
43 | + <!-- <value>/grid/vol/0/var/lib/${user.name}/checkpoint,/export/home/${user.name}/var/lib/checkpoint</value> --> | |
44 | + </property> | |
45 | + | |
46 | + <property> | |
47 | + <name>hadoop.security.authentication</name> | |
48 | + <value>kerberos</value> | |
49 | + <description> | |
50 | + Set the authentication for the cluster. Valid values are: simple or | |
51 | + kerberos. | |
52 | + </description> | |
53 | + </property> | |
54 | + <property> | |
55 | + <name>hadoop.security.authorization</name> | |
56 | + <value>true</value> | |
57 | + <description> | |
58 | + Enable authorization for different protocols. | |
59 | + </description> | |
60 | + </property> | |
61 | + <property> | |
62 | + <name>hadoop.security.auth_to_local</name> | |
63 | + <value> | |
64 | + RULE:[2:$1@$0](.*@${this.realm})s/@.*// | |
65 | + RULE:[1:$1@$0](.*@${this.realm})s/@.*// | |
66 | + RULE:[2:$1@$0](hdfs@.*${this.realm})s/.*/hdfs/ | |
67 | + RULE:[2:$1@$0](mapred@.*${this.realm})s/.*/mapred/ | |
68 | + DEFAULT</value> | |
69 | + </property> | |
70 | + <property> | |
71 | + <name>hadoop.security.group.mapping</name> | |
72 | + <value>org.apache.hadoop.security.JniBasedUnixGroupsMapping</value> | |
73 | + </property> | |
74 | + <property> | |
75 | + <name>hadoop.security.groups.cache.secs</name> | |
76 | + <value>14400</value> | |
77 | + </property> | |
78 | + <property> | |
79 | + <name>hadoop.kerberos.kinit.command</name> | |
80 | + <value>/usr/bin/kinit</value> | |
81 | + </property> | |
82 | + | |
83 | + <property> | |
84 | + <name>hadoop.http.filter.initializers</name> | |
85 | + <value>org.apache.hadoop.security.AuthenticationFilterInitializer</value> | |
86 | + <!-- <value>org.apache.hadoop.http.lib.StaticUserWebFilter</value> --> | |
87 | + <description>The name of a class that initializes an input filter for Jetty. | |
88 | + This filter will always return Dr.Who as the web user when the servlets | |
89 | + query for the authenticated user </description> | |
90 | + </property> | |
91 | + <property> | |
92 | + <name>hadoop.http.authentication.signature.secret.file</name> | |
93 | + <value>/grid/etc/hadoop-http-auth-signature-secret</value> | |
94 | + </property> | |
95 | + <property> | |
96 | + <name>hadoop.http.authentication.cookie.domain</name> | |
97 | + <value>${this.domain}</value> | |
98 | + </property> | |
99 | + <property> | |
100 | + <name>hadoop.http.authentication.type</name> | |
101 | + <value>simple</value> | |
102 | + <description>Defines authentication used for the HTTP web-consoles. | |
103 | + The supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#. | |
104 | + The dfeault value is simple.</description> | |
105 | + </property> | |
106 | + <property> | |
107 | + <name>hadoop.http.authentication.kerberos.principal</name> | |
108 | + <value>HTTP/localhost@${this.realm}</value> | |
109 | + <!-- <value>HTTP/_HOST@${this.realm}</value> | |
110 | + _HOST N/A!: v1.0, v1.1, HDP1.2; OK: v2.0, CDH3, CDH4 --> | |
111 | + <!-- for v1.0, v1.1, HDP1.2 | |
112 | + <value>HTTP/${this.fqdn}@${this.realm}</value> | |
113 | + "this".fqdn must be set in the conf/hadoop-env.sh | |
114 | + or each setting | |
115 | + <value>HTTP/${this.cluster.name}-nn.${this.domain}@${this.realm}</value> | |
116 | + <value>HTTP/${this.cluster.name}-cn.${this.domain}@${this.realm}</value> | |
117 | + <value>HTTP/${this.cluster.name}-jt.${this.domain}@${this.realm}</value> | |
118 | + <value>HTTP/${this.cluster.name}-jh.${this.domain}@${this.realm}</value> | |
119 | + <value>HTTP/dn00000.${this.domain}@${this.realm}</value> | |
120 | + <value>HTTP/dn00001.${this.domain}@${this.realm}</value> | |
121 | + --> | |
122 | + </property> | |
123 | + <property> | |
124 | + <name>hadoop.http.authentication.kerberos.keytab</name> | |
125 | + <value>${this.keytab.dir}/HTTP.keytab</value> | |
126 | + </property> | |
127 | + | |
128 | + <property> | |
129 | + <name>hadoop.proxyuser.oozie.hosts</name> | |
130 | + <value>localhost</value> | |
131 | + </property> | |
132 | + <property> | |
133 | + <name>hadoop.proxyuser.oozie.groups</name> | |
134 | + <value>hadoopers</value> | |
135 | + </property> | |
136 | + <property> | |
137 | + <name>hadoop.proxyuser.httpfs.hosts</name> | |
138 | + <value>localhost</value> | |
139 | + </property> | |
140 | + <property> | |
141 | + <name>hadoop.proxyuser.httpfs.groups</name> | |
142 | + <value>hadoopers</value> | |
143 | + </property> | |
144 | +</configuration> |
@@ -0,0 +1,12 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | + | |
3 | +<!-- | |
4 | + This file contains pool and user allocations for the Fair Scheduler. | |
5 | + Its format is explained in the Fair Scheduler documentation at | |
6 | + http://hadoop.apache.org/common/docs/r0.20.205.0/fair_scheduler.html. | |
7 | + The documentation also includes a sample config file. | |
8 | +--> | |
9 | + | |
10 | +<allocations> | |
11 | + | |
12 | +</allocations> |
@@ -0,0 +1,106 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
3 | + | |
4 | +<!-- Put site-specific property overrides in this file. --> | |
5 | + | |
6 | +<configuration> | |
7 | + <property> | |
8 | + <name>security.client.protocol.acl</name> | |
9 | + <value>*</value> | |
10 | + <description>ACL for ClientProtocol, which is used by user code | |
11 | + via the DistributedFileSystem. | |
12 | + The ACL is a comma-separated list of user and group names. The user and | |
13 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
14 | + A special value of "*" means all users are allowed.</description> | |
15 | + </property> | |
16 | + | |
17 | + <property> | |
18 | + <name>security.client.datanode.protocol.acl</name> | |
19 | + <value>*</value> | |
20 | + <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol | |
21 | + for block recovery. | |
22 | + The ACL is a comma-separated list of user and group names. The user and | |
23 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
24 | + A special value of "*" means all users are allowed.</description> | |
25 | + </property> | |
26 | + | |
27 | + <property> | |
28 | + <name>security.datanode.protocol.acl</name> | |
29 | + <value>*</value> | |
30 | + <description>ACL for DatanodeProtocol, which is used by datanodes to | |
31 | + communicate with the namenode. | |
32 | + The ACL is a comma-separated list of user and group names. The user and | |
33 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
34 | + A special value of "*" means all users are allowed.</description> | |
35 | + </property> | |
36 | + | |
37 | + <property> | |
38 | + <name>security.inter.datanode.protocol.acl</name> | |
39 | + <value>*</value> | |
40 | + <description>ACL for InterDatanodeProtocol, the inter-datanode protocol | |
41 | + for updating generation timestamp. | |
42 | + The ACL is a comma-separated list of user and group names. The user and | |
43 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
44 | + A special value of "*" means all users are allowed.</description> | |
45 | + </property> | |
46 | + | |
47 | + <property> | |
48 | + <name>security.namenode.protocol.acl</name> | |
49 | + <value>*</value> | |
50 | + <description>ACL for NamenodeProtocol, the protocol used by the secondary | |
51 | + namenode to communicate with the namenode. | |
52 | + The ACL is a comma-separated list of user and group names. The user and | |
53 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
54 | + A special value of "*" means all users are allowed.</description> | |
55 | + </property> | |
56 | + | |
57 | + <property> | |
58 | + <name>security.inter.tracker.protocol.acl</name> | |
59 | + <value>*</value> | |
60 | + <description>ACL for InterTrackerProtocol, used by the tasktrackers to | |
61 | + communicate with the jobtracker. | |
62 | + The ACL is a comma-separated list of user and group names. The user and | |
63 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
64 | + A special value of "*" means all users are allowed.</description> | |
65 | + </property> | |
66 | + | |
67 | + <property> | |
68 | + <name>security.job.submission.protocol.acl</name> | |
69 | + <value>*</value> | |
70 | + <description>ACL for JobSubmissionProtocol, used by job clients to | |
71 | + communciate with the jobtracker for job submission, querying job status etc. | |
72 | + The ACL is a comma-separated list of user and group names. The user and | |
73 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
74 | + A special value of "*" means all users are allowed.</description> | |
75 | + </property> | |
76 | + | |
77 | + <property> | |
78 | + <name>security.task.umbilical.protocol.acl</name> | |
79 | + <value>*</value> | |
80 | + <description>ACL for TaskUmbilicalProtocol, used by the map and reduce | |
81 | + tasks to communicate with the parent tasktracker. | |
82 | + The ACL is a comma-separated list of user and group names. The user and | |
83 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
84 | + A special value of "*" means all users are allowed.</description> | |
85 | + </property> | |
86 | + | |
87 | + <property> | |
88 | + <name>security.refresh.policy.protocol.acl</name> | |
89 | + <value>hdfs,mapred hadoop</value> | |
90 | + <description>ACL for RefreshAuthorizationPolicyProtocol, used by the | |
91 | + dfsadmin and mradmin commands to refresh the security policy in-effect. | |
92 | + The ACL is a comma-separated list of user and group names. The user and | |
93 | + group list is separated by a blank. For e.g. "alice,bob users,wheel". | |
94 | + A special value of "*" means all users are allowed.</description> | |
95 | + </property> | |
96 | + | |
97 | + <property> | |
98 | + <name>security.admin.operations.protocol.acl</name> | |
99 | + <value>hdfs,mapred hadoop</value> | |
100 | + <description>ACL for AdminOperationsProtocol, used by the mradmins commands | |
101 | + to refresh queues and nodes at JobTracker. The ACL is a comma-separated list of | |
102 | + user and group names. The user and group list is separated by a blank. | |
103 | + For e.g. "alice,bob users,wheel". A special value of "*" means all users are | |
104 | + allowed.</description> | |
105 | + </property> | |
106 | +</configuration> |
@@ -0,0 +1,178 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | + | |
3 | +<!-- This is the configuration file for the resource manager in Hadoop. --> | |
4 | +<!-- You can configure various scheduling parameters related to queues. --> | |
5 | +<!-- The properties for a queue follow a naming convention,such as, --> | |
6 | +<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. --> | |
7 | + | |
8 | +<configuration> | |
9 | + | |
10 | + <property> | |
11 | + <name>mapred.capacity-scheduler.maximum-system-jobs</name> | |
12 | + <value>3000</value> | |
13 | + <description>Maximum number of jobs in the system which can be initialized, | |
14 | + concurrently, by the CapacityScheduler. | |
15 | + </description> | |
16 | + </property> | |
17 | + | |
18 | + <property> | |
19 | + <name>mapred.capacity-scheduler.queue.default.capacity</name> | |
20 | + <value>100</value> | |
21 | + <description>Percentage of the number of slots in the cluster that are | |
22 | + to be available for jobs in this queue. | |
23 | + </description> | |
24 | + </property> | |
25 | + | |
26 | + <property> | |
27 | + <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name> | |
28 | + <value>-1</value> | |
29 | + <description> | |
30 | + maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster. | |
31 | + This provides a means to limit how much excess capacity a queue can use. By default, there is no limit. | |
32 | + The maximum-capacity of a queue can only be greater than or equal to its minimum capacity. | |
33 | + Default value of -1 implies a queue can use complete capacity of the cluster. | |
34 | + | |
35 | + This property could be to curtail certain jobs which are long running in nature from occupying more than a | |
36 | + certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of | |
37 | + other queues being affected. | |
38 | + | |
39 | + One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity | |
40 | + the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in | |
41 | + absolute terms would increase accordingly. | |
42 | + </description> | |
43 | + </property> | |
44 | + | |
45 | + <property> | |
46 | + <name>mapred.capacity-scheduler.queue.default.supports-priority</name> | |
47 | + <value>false</value> | |
48 | + <description>If true, priorities of jobs will be taken into | |
49 | + account in scheduling decisions. | |
50 | + </description> | |
51 | + </property> | |
52 | + | |
53 | + <property> | |
54 | + <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name> | |
55 | + <value>100</value> | |
56 | + <description> Each queue enforces a limit on the percentage of resources | |
57 | + allocated to a user at any given time, if there is competition for them. | |
58 | + This user limit can vary between a minimum and maximum value. The former | |
59 | + depends on the number of users who have submitted jobs, and the latter is | |
60 | + set to this property value. For example, suppose the value of this | |
61 | + property is 25. If two users have submitted jobs to a queue, no single | |
62 | + user can use more than 50% of the queue resources. If a third user submits | |
63 | + a job, no single user can use more than 33% of the queue resources. With 4 | |
64 | + or more users, no user can use more than 25% of the queue's resources. A | |
65 | + value of 100 implies no user limits are imposed. | |
66 | + </description> | |
67 | + </property> | |
68 | + | |
69 | + <property> | |
70 | + <name>mapred.capacity-scheduler.queue.default.user-limit-factor</name> | |
71 | + <value>1</value> | |
72 | + <description>The multiple of the queue capacity which can be configured to | |
73 | + allow a single user to acquire more slots. | |
74 | + </description> | |
75 | + </property> | |
76 | + | |
77 | + <property> | |
78 | + <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks</name> | |
79 | + <value>200000</value> | |
80 | + <description>The maximum number of tasks, across all jobs in the queue, | |
81 | + which can be initialized concurrently. Once the queue's jobs exceed this | |
82 | + limit they will be queued on disk. | |
83 | + </description> | |
84 | + </property> | |
85 | + | |
86 | + <property> | |
87 | + <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user</name> | |
88 | + <value>100000</value> | |
89 | + <description>The maximum number of tasks per-user, across all the of the | |
90 | + user's jobs in the queue, which can be initialized concurrently. Once the | |
91 | + user's jobs exceed this limit they will be queued on disk. | |
92 | + </description> | |
93 | + </property> | |
94 | + | |
95 | + <property> | |
96 | + <name>mapred.capacity-scheduler.queue.default.init-accept-jobs-factor</name> | |
97 | + <value>10</value> | |
98 | + <description>The multipe of (maximum-system-jobs * queue-capacity) used to | |
99 | + determine the number of jobs which are accepted by the scheduler. | |
100 | + </description> | |
101 | + </property> | |
102 | + | |
103 | + <!-- The default configuration settings for the capacity task scheduler --> | |
104 | + <!-- The default values would be applied to all the queues which don't have --> | |
105 | + <!-- the appropriate property for the particular queue --> | |
106 | + <property> | |
107 | + <name>mapred.capacity-scheduler.default-supports-priority</name> | |
108 | + <value>false</value> | |
109 | + <description>If true, priorities of jobs will be taken into | |
110 | + account in scheduling decisions by default in a job queue. | |
111 | + </description> | |
112 | + </property> | |
113 | + | |
114 | + <property> | |
115 | + <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name> | |
116 | + <value>100</value> | |
117 | + <description>The percentage of the resources limited to a particular user | |
118 | + for the job queue at any given point of time by default. | |
119 | + </description> | |
120 | + </property> | |
121 | + | |
122 | + | |
123 | + <property> | |
124 | + <name>mapred.capacity-scheduler.default-user-limit-factor</name> | |
125 | + <value>1</value> | |
126 | + <description>The default multiple of queue-capacity which is used to | |
127 | + determine the amount of slots a single user can consume concurrently. | |
128 | + </description> | |
129 | + </property> | |
130 | + | |
131 | + <property> | |
132 | + <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name> | |
133 | + <value>200000</value> | |
134 | + <description>The default maximum number of tasks, across all jobs in the | |
135 | + queue, which can be initialized concurrently. Once the queue's jobs exceed | |
136 | + this limit they will be queued on disk. | |
137 | + </description> | |
138 | + </property> | |
139 | + | |
140 | + <property> | |
141 | + <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name> | |
142 | + <value>100000</value> | |
143 | + <description>The default maximum number of tasks per-user, across all the of | |
144 | + the user's jobs in the queue, which can be initialized concurrently. Once | |
145 | + the user's jobs exceed this limit they will be queued on disk. | |
146 | + </description> | |
147 | + </property> | |
148 | + | |
149 | + <property> | |
150 | + <name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name> | |
151 | + <value>10</value> | |
152 | + <description>The default multipe of (maximum-system-jobs * queue-capacity) | |
153 | + used to determine the number of jobs which are accepted by the scheduler. | |
154 | + </description> | |
155 | + </property> | |
156 | + | |
157 | + <!-- Capacity scheduler Job Initialization configuration parameters --> | |
158 | + <property> | |
159 | + <name>mapred.capacity-scheduler.init-poll-interval</name> | |
160 | + <value>5000</value> | |
161 | + <description>The amount of time in miliseconds which is used to poll | |
162 | + the job queues for jobs to initialize. | |
163 | + </description> | |
164 | + </property> | |
165 | + <property> | |
166 | + <name>mapred.capacity-scheduler.init-worker-threads</name> | |
167 | + <value>5</value> | |
168 | + <description>Number of worker threads which would be used by | |
169 | + Initialization poller to initialize jobs in a set of queue. | |
170 | + If number mentioned in property is equal to number of job queues | |
171 | + then a single thread would initialize jobs in a queue. If lesser | |
172 | + then a thread would get a set of queues assigned. If the number | |
173 | + is greater then number of threads would be equal to number of | |
174 | + job queues. | |
175 | + </description> | |
176 | + </property> | |
177 | + | |
178 | +</configuration> |
@@ -0,0 +1,82 @@ | ||
1 | +# Set Hadoop-specific environment variables here. | |
2 | + | |
3 | + | |
4 | +export JAVA_HOME=/usr/local/jvm/java-6-ora | |
5 | +# The directory where pid files are stored. /tmp by default. | |
6 | +export HADOOP_PID_DIR=/grid/vol/0/var/run/${USER} | |
7 | +# Where log files are stored. $HADOOP_HOME/logs by default. | |
8 | +export HADOOP_LOG_DIR=/grid/vol/0/var/log/${USER} | |
9 | + | |
10 | +export HADOOP_SECURE_DN_USER=hdfs | |
11 | +export HADOOP_SECURE_DN_PID_DIR=/grid/vol/0/var/run/${HADOOP_SECURE_DN_USER} | |
12 | +export HADOOP_SECURE_DN_LOG_DIR=/grid/vol/0/var/log/${HADOOP_SECURE_DN_USER} | |
13 | + | |
14 | +# Extra Java CLASSPATH elements. Optional. | |
15 | +if [ x"$HADOOP_CLASSPATH" = x ]; then | |
16 | + export HADOOP_CLASSPATH="/usr/share/java/commons-daemon.jar" | |
17 | +else | |
18 | + # for Hive and HCatalog | |
19 | + export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:/usr/share/java/commons-daemon.jar" | |
20 | +fi | |
21 | +export HADOOP_USER_CLASSPATH_FIRST=true | |
22 | + | |
23 | + | |
24 | + | |
25 | +# The only required environment variable is JAVA_HOME. All others are | |
26 | +# optional. When running a distributed configuration it is best to | |
27 | +# set JAVA_HOME in this file, so that it is correctly defined on | |
28 | +# remote nodes. | |
29 | + | |
30 | +# The java implementation to use. Required. | |
31 | +# export JAVA_HOME=/usr/lib/j2sdk1.5-sun | |
32 | + | |
33 | +# Extra Java CLASSPATH elements. Optional. | |
34 | +# export HADOOP_CLASSPATH= | |
35 | + | |
36 | +# The maximum amount of heap to use, in MB. Default is 1000. | |
37 | +# export HADOOP_HEAPSIZE=2000 | |
38 | + | |
39 | +# Extra Java runtime options. Empty by default. | |
40 | +# if [ "$HADOOP_OPTS" == "" ]; then export HADOOP_OPTS=-server; else HADOOP_OPTS+=" -server"; fi | |
41 | + | |
42 | +# Command specific options appended to HADOOP_OPTS when specified | |
43 | +export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS" | |
44 | +export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS" | |
45 | +export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS" | |
46 | +export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS" | |
47 | +export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS" | |
48 | +# export HADOOP_TASKTRACKER_OPTS= | |
49 | +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) | |
50 | +# export HADOOP_CLIENT_OPTS | |
51 | + | |
52 | +# Extra ssh options. Empty by default. | |
53 | +# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR" | |
54 | + | |
55 | +# Where log files are stored. $HADOOP_HOME/logs by default. | |
56 | +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs | |
57 | + | |
58 | +# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. | |
59 | +# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves | |
60 | + | |
61 | +# host:path where hadoop code should be rsync'd from. Unset by default. | |
62 | +# export HADOOP_MASTER=master:/home/$USER/src/hadoop | |
63 | + | |
64 | +# Seconds to sleep between slave commands. Unset by default. This | |
65 | +# can be useful in large clusters, where, e.g., slave rsyncs can | |
66 | +# otherwise arrive faster than the master can service them. | |
67 | +# export HADOOP_SLAVE_SLEEP=0.1 | |
68 | + | |
69 | +# The directory where pid files are stored. /tmp by default. | |
70 | +# NOTE: this should be set to a directory that can only be written to by | |
71 | +# the users that are going to run the hadoop daemons. Otherwise there is | |
72 | +# the potential for a symlink attack. | |
73 | +# export HADOOP_PID_DIR=/var/hadoop/pids | |
74 | + | |
75 | +# A string representing this instance of hadoop. $USER by default. | |
76 | +# export HADOOP_IDENT_STRING=$USER | |
77 | + | |
78 | +# The scheduling priority for daemon processes. See 'man nice'. | |
79 | +# export HADOOP_NICENESS=10 | |
80 | + | |
81 | + | |
82 | + |
@@ -0,0 +1,49 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
3 | + | |
4 | +<!-- This is a template file for queue acls configuration properties --> | |
5 | + | |
6 | +<configuration> | |
7 | + | |
8 | +<property> | |
9 | + <name>mapred.queue.default.acl-submit-job</name> | |
10 | + <value>*</value> | |
11 | + <description> Comma separated list of user and group names that are allowed | |
12 | + to submit jobs to the 'default' queue. The user list and the group list | |
13 | + are separated by a blank. For e.g. user1,user2 group1,group2. | |
14 | + If set to the special value '*', it means all users are allowed to | |
15 | + submit jobs. If set to ' '(i.e. space), no user will be allowed to submit | |
16 | + jobs. | |
17 | + | |
18 | + It is only used if authorization is enabled in Map/Reduce by setting the | |
19 | + configuration property mapred.acls.enabled to true. | |
20 | + | |
21 | + Irrespective of this ACL configuration, the user who started the cluster and | |
22 | + cluster administrators configured via | |
23 | + mapreduce.cluster.administrators can submit jobs. | |
24 | + </description> | |
25 | +</property> | |
26 | + | |
27 | +<property> | |
28 | + <name>mapred.queue.default.acl-administer-jobs</name> | |
29 | + <value> hadoop,gridops</value> | |
30 | + <description> Comma separated list of user and group names that are allowed | |
31 | + to view job details, kill jobs or modify job's priority for all the jobs | |
32 | + in the 'default' queue. The user list and the group list | |
33 | + are separated by a blank. For e.g. user1,user2 group1,group2. | |
34 | + If set to the special value '*', it means all users are allowed to do | |
35 | + this operation. If set to ' '(i.e. space), no user will be allowed to do | |
36 | + this operation. | |
37 | + | |
38 | + It is only used if authorization is enabled in Map/Reduce by setting the | |
39 | + configuration property mapred.acls.enabled to true. | |
40 | + | |
41 | + Irrespective of this ACL configuration, the user who started the cluster and | |
42 | + cluster administrators configured via | |
43 | + mapreduce.cluster.administrators can do the above operations on all the jobs | |
44 | + in all the queues. The job owner can do all the above operations on his/her | |
45 | + job irrespective of this ACL configuration. | |
46 | + </description> | |
47 | +</property> | |
48 | + | |
49 | +</configuration> |
@@ -0,0 +1,162 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
3 | + | |
4 | +<!-- Put site-specific property overrides in this file. --> | |
5 | + | |
6 | +<configuration> | |
7 | + <property> | |
8 | + <name>this.jobtracker.fqdn</name> | |
9 | + <value>localhost</value> | |
10 | + <!-- <value>${this.cluster.name}-jt.${this.domain}</value> --> | |
11 | + </property> | |
12 | + <property> | |
13 | + <name>this.jobhistory.fqdn</name> | |
14 | + <value>localhost</value> | |
15 | + <!-- <value>${this.cluster.name}-jt.${this.domain}</value> --> | |
16 | + <!-- <value>${this.cluster.name}-jh.${this.domain}</value> --> | |
17 | + </property> | |
18 | + | |
19 | + <property> | |
20 | + <name>mapred.job.tracker</name> | |
21 | + <value>${this.jobtracker.fqdn}:9001</value> | |
22 | + </property> | |
23 | + <property> | |
24 | + <name>mapred.system.dir</name> | |
25 | + <value>/grid/vol/0/var/lib/${user.name}/system</value> | |
26 | + </property> | |
27 | + <property> | |
28 | + <name>mapreduce.jobtracker.staging.root.dir</name> | |
29 | + <value>/user</value> | |
30 | + </property> | |
31 | + | |
32 | + <property> | |
33 | + <name>mapred.job.tracker.persist.jobstatus.active</name> | |
34 | + <value>true</value> | |
35 | + <description>Indicates if persistency of job status information is | |
36 | + active or not. (default: false) | |
37 | + </description> | |
38 | + </property> | |
39 | + <property> | |
40 | + <name>mapred.job.tracker.persist.jobstatus.hours</name> | |
41 | + <value>3</value> | |
42 | + <description>The number of hours job status information is persisted in DFS. | |
43 | + The job status information will be available after it drops of the memory | |
44 | + queue and between jobtracker restarts. With a zero value the job status | |
45 | + information is not persisted at all in DFS. (default: 0) | |
46 | + </description> | |
47 | + </property> | |
48 | + <property> | |
49 | + <name>mapred.job.tracker.persist.jobstatus.dir</name> | |
50 | + <value>/grid/vol/0/var/lib/mapred/jobstatus</value> | |
51 | + <description>The directory where the job status information is persisted | |
52 | + in a file system to be available after it drops of the memory queue and | |
53 | + between jobtracker restarts. (default: /jobtracker/jobsInfo) | |
54 | + </description> | |
55 | + </property> | |
56 | + | |
57 | + <property> | |
58 | + <name>hadoop.job.history.location</name> | |
59 | + <value>file:///grid/vol/0/var/lib/mapred/history</value> | |
60 | + <description>hdfs:// is UNusable.</description> | |
61 | + </property> | |
62 | + <property> | |
63 | + <name>mapred.job.tracker.history.completed.location</name> | |
64 | + <value>hdfs:///grid/vol/0/var/lib/mapred/history/done</value> | |
65 | + </property> | |
66 | + | |
67 | + <property> | |
68 | + <name>mapred.local.dir</name> | |
69 | + <value>/grid/vol/0/var/lib/mapred/local</value> | |
70 | + <!-- <value>/grid/vol/0/var/lib/mapred/local,/grid/vol/1/var/lib/mapred/local</value> --> | |
71 | + <description> | |
72 | + The local directory where MapReduce stores intermediate data files. | |
73 | + May be a comma-separated list of directories on different devices in order to spread disk i/o. | |
74 | + Directories that do not exist are ignored. | |
75 | + </description> | |
76 | + </property> | |
77 | + <property> | |
78 | + <name>mapred.temp.dir</name> | |
79 | + <value>/grid/vol/0/tmp/${user.name}/mapred</value> | |
80 | + <!-- <value>/grid/vol/0/tmp/${user.name}/mapred,/grid/vol/1/tmp/${user.name}/mapred</value> --> | |
81 | + <description> | |
82 | + A shared directory for temporary files. | |
83 | + </description> | |
84 | + </property> | |
85 | + | |
86 | + <property> | |
87 | + <name>mapred.hosts</name> | |
88 | + <value>/grid/usr/hadoop/conf/hosts.include</value> | |
89 | + <description> | |
90 | + Names a file that contains the list of nodes that may connect to the jobtracker. | |
91 | + If the value is empty, all hosts are permitted. | |
92 | + </description> | |
93 | + </property> | |
94 | + <property> | |
95 | + <name>mapred.hosts.exclude</name> | |
96 | + <value>/grid/usr/hadoop/conf/hosts.exclude</value> | |
97 | + <description> | |
98 | + Names a file that contains the list of hosts that should be excluded by the jobtracker. | |
99 | + If the value is empty, no hosts are excluded. | |
100 | + </description> | |
101 | + </property> | |
102 | + | |
103 | + <property> | |
104 | + <name>mapred.jobtracker.taskScheduler</name> | |
105 | + <value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value> | |
106 | + </property> | |
107 | + | |
108 | + <property> | |
109 | + <name>mapred.acls.enabled</name> | |
110 | + <value>true</value> | |
111 | + </property> | |
112 | + <property> | |
113 | + <name>mapreduce.cluster.administrators</name> | |
114 | + <value> mapred</value> | |
115 | + </property> | |
116 | + <property> | |
117 | + <name>mapreduce.jobtracker.kerberos.principal</name> | |
118 | + <value>mapred/_HOST@${this.realm}</value> | |
119 | + <!-- _HOST is replaced with the mapred.job.tracker's host name --> | |
120 | + <!-- <value>mapred/${this.jobtracker.fqdn}@${this.realm}</value> --> | |
121 | + </property> | |
122 | + <property> | |
123 | + <name>mapreduce.jobtracker.keytab.file</name> | |
124 | + <value>${this.keytab.dir}/jt.keytab</value> | |
125 | + </property> | |
126 | + <property> | |
127 | + <name>mapreduce.tasktracker.kerberos.principal</name> | |
128 | + <value>mapred/localhost@${this.realm}</value> | |
129 | + <!-- <value>mapred/_HOST@${this.realm}</value> --> | |
130 | + </property> | |
131 | + <property> | |
132 | + <name>mapreduce.tasktracker.keytab.file</name> | |
133 | + <value>${this.keytab.dir}/tt.keytab</value> | |
134 | + </property> | |
135 | + | |
136 | + <property> | |
137 | + <name>mapred.task.tracker.task-controller</name> | |
138 | + <value>org.apache.hadoop.mapred.LinuxTaskController</value> | |
139 | + </property> | |
140 | + <property> | |
141 | + <name>mapreduce.tasktracker.group</name> | |
142 | + <value>mapred</value> | |
143 | + </property> | |
144 | + | |
145 | + <property> | |
146 | + <name>mapreduce.history.server.embedded</name> | |
147 | + <value>true</value> | |
148 | + <!-- <value>false</value> --> | |
149 | + </property> | |
150 | + <property> | |
151 | + <name>mapreduce.history.server.http.address</name> | |
152 | + <value>${this.jobhistory.fqdn}:19888</value> | |
153 | + </property> | |
154 | + <property> | |
155 | + <name>mapreduce.jobhistory.kerberos.principal</name> | |
156 | + <value>mapred/${this.jobhistory.fqdn}@${this.realm}</value> | |
157 | + </property> | |
158 | + <property> | |
159 | + <name>mapreduce.jobhistory.keytab.file</name> | |
160 | + <value>${this.keytab.dir}/jh.keytab</value> | |
161 | + </property> | |
162 | +</configuration> |
@@ -0,0 +1,37 @@ | ||
1 | +<?xml version="1.0"?> | |
2 | +<!-- | |
3 | + Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | + you may not use this file except in compliance with the License. | |
5 | + You may obtain a copy of the License at | |
6 | + | |
7 | + http://www.apache.org/licenses/LICENSE-2.0 | |
8 | + | |
9 | + Unless required by applicable law or agreed to in writing, software | |
10 | + distributed under the License is distributed on an "AS IS" BASIS, | |
11 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | + See the License for the specific language governing permissions and | |
13 | + limitations under the License. | |
14 | +--> | |
15 | +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | |
16 | +<xsl:output method="html"/> | |
17 | +<xsl:template match="configuration"> | |
18 | +<html> | |
19 | +<body> | |
20 | +<table border="1"> | |
21 | +<tr> | |
22 | + <td>name</td> | |
23 | + <td>value</td> | |
24 | + <td>description</td> | |
25 | +</tr> | |
26 | +<xsl:for-each select="property"> | |
27 | +<tr> | |
28 | + <td><a name="{name}"><xsl:value-of select="name"/></a></td> | |
29 | + <td><xsl:value-of select="value"/></td> | |
30 | + <td><xsl:value-of select="description"/></td> | |
31 | +</tr> | |
32 | +</xsl:for-each> | |
33 | +</table> | |
34 | +</body> | |
35 | +</html> | |
36 | +</xsl:template> | |
37 | +</xsl:stylesheet> |