I have a hacked together class that somebody else wrote based on some 
wikimedia puppet scripts. I've been asked to set the JAVA_HEAP_MAX if the 
host is a resource manager.

I'm trying to set the variable yarn_env_java_heap_max if the host is a rm. 
Is it possible to set the paramater as below? Do I have to create a 
separate role just to set this paramater? Am I better off checking if a rm 
in the yarn-site.sh.erb template itself? 

        if ($::hostname in $rm_hosts){
        yarn_env_java_heap_max => '-Xmx4000M', 
}

I am accsessing the variable in the yarn-site.sh.erb template as follows:

<% if @yarn_env_java_heap_max -%>
JAVA_HEAP_MAX = <%= yarn_env_java_heap_max %>
<% end -%>




the role class


# role/wh/hadoop.pp
# borrowed from https://git.wikimedia.org/tree/operations%2Fpuppet.git

# Role classes for Hadoop nodes.
#
# Usage:
#
# To install only hadoop client packages and configs:
#   include role::wh::hadoop
#
# To install a Hadoop Master (NameNode + ResourceManager, etc.):
#   include role::wh::hadoop::master
#
# To install a Hadoop Worker (DataNode + NodeManager + etc.):
#   include role::wh::hadoop::worker
#

class role::wh::hadoop::client {
  # include common labs or production hadoop configs
  case $common::data::env {
    'dev': { 
      include role::wh::hadoop::dev
      anchor { 'cdh5_hadoop_dev_first': } -> Class['role::wh::hadoop::dev'] 
-> anchor { 'cdh5_hadoop_dev_last': } 
     }
    'qa' : { 
      include role::wh::hadoop::qa 
      anchor { 'cdh5_hadoop_qa_first': } -> Class['role::wh::hadoop::qa'] 
-> anchor { 'cdh5_hadoop_qa_last': }
    }
    'prod': { 
      include role::wh::hadoop::production
      anchor { 'cdh5_hadoop_production_first': } -> 
Class['role::wh::hadoop::production'] -> anchor { 
'cdh5_hadoop_production_last': }
    }
    default: { fail("Unrecognized environment type for hadoop") }
  }
}

class role::wh::hadoop::journalnode inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Journal Node': }
    
    class { 'cdh5::hadoop::journalnode' :}
    
    anchor { 'cdh5_hadoop_journalnode_first': } -> 
Class['cdh5::hadoop::journalnode'] -> anchor { 
'cdh5_hadoop_journalnode_last': }
}

class role::wh::hadoop::master inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Master (NameNode, ResourceManager & 
HistoryServer)': }
    
    system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
    system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
    
    class { 'cdh5::hadoop::master' :}
    
    anchor { 'cdh5_hadoop_master_first': } -> Class['cdh5::hadoop::master'] 
-> anchor { 'cdh5_hadoop_master_last': }
}

class role::wh::hadoop::worker inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Worker (DataNode & NodeManager)': }
    class { 'cdh5::hadoop::worker' : }
    
    anchor { 'cdh5_hadoop_worker_first': } -> Class['cdh5::hadoop::worker'] 
-> anchor { 'cdh5_hadoop_worker_last': }
}

class role::wh::hadoop::standby inherits role::wh::hadoop::client {
    motd::register{ 'Hadoop Standby NameNode': }
    
    system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
    system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
    
    class { 'cdh5::hadoop::namenode' : }
    class {'cdh5::hadoop::resourcemanager' : }
    
    anchor { 'cdh5_wh_hadoop_standby_first': } -> 
Class['cdh5::hadoop::namenode'] -> Class['cdh5::hadoop::resourcemanager'] 
-> anchor { 'cdh5_wh_hadoop_standby_last': }
}

class role::wh::hadoop::primary::postinstall inherits 
role::wh::hadoop::client {
    class { 'cdh5::hadoop::namenode::primarypostinstall' : }
    
    anchor { 'cdh5_wh_hadoop_primarypostinstall_first': } -> 
Class['cdh5::hadoop::namenode::primarypostinstall'] -> anchor { 
'cdh5_wh_hadoop_primarypostinstall_last': }
}

class role::wh::hadoop::standby::postinstall inherits 
role::wh::hadoop::client {
    class { 'cdh5::hadoop::namenode::standbypostinstall' : }
    
    anchor { 'cdh5_wh_hadoop_standbypostinstall_first': } -> 
Class['cdh5::hadoop::namenode::standbypostinstall'] -> anchor { 
'cdh5_wh_hadoop_standbypostinstall_last': }
}


### The following classes should not be included directly.
### You should either include role::wh::hadoop::client,
### or role::wh::hadoop::worker or
### role::wh::hadoop::master.

class role::wh::hadoop::production {
  
    class { 'cdh5::hadoop':
        namenode_hosts                           => [
        'us3sm2nn010r07.comp.prod.local',
        'us3sm2nn011r08.comp.prod.local',
    ],
    rm_hosts                                 => [
                                                        
'us3sm2nn010r07.comp.prod.local',
                                                        
'us3sm2nn011r08.comp.prod.local',
                                                    ],
        dfs_name_dir                             => 
[['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name', '/nfs/namedir']],
        config_directory                         => '/etc/hadoop/conf',
        nameservice_id                           => 'whprod',           # 
This is the logical name of the Hadoop cluster.
        journalnode_hosts                        => [
        'us3sm2zk010r07.comp.prod.local',
        'us3sm2zk011r08.comp.prod.local',
        'us3sm2zk012r09.comp.prod.local',        
    ],
        dfs_journalnode_edits_dir                => 
'/var/lib/hadoop-hdfs/journalEdits',
        datanode_mounts                          => [
        '/data1',
        '/data2',
        '/data3',
        '/data4',
        '/data5',
        '/data6',
        '/data7',
        '/data8',
        '/data9',
        '/data10',
        '/data11'
    ],
    dfs_data_path                            => 'dfs',
        dfs_block_size                           => 268435456,  # 256 MB
        
        # Turn on Snappy compression by default for maps and final outputs
        mapreduce_intermediate_compression       => true,
        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression             => true,
        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression_type        => 'BLOCK',
        #mapreduce_map_tasks_maximum              => ($::processorcount - 
2) / 2,
        #mapreduce_reduce_tasks_maximum           => ($::processorcount - 
2) / 2,
        #mapreduce_job_reuse_jvm_num_tasks        => 1,
        #mapreduce_map_memory_mb                  => 1536,
        #mapreduce_reduce_memory_mb               => 3072,
        #mapreduce_map_java_opts                  => '-Xmx1024M',
        #mapreduce_reduce_java_opts               => '-Xmx2560M',
        #mapreduce_reduce_shuffle_parallelcopies  => 10,
        #mapreduce_task_io_sort_mb                => 200,
        #mapreduce_task_io_sort_factor            => 10,
        if ($::hostname in $rm_hosts){
        yarn_env_java_heap_max => '-Xmx4000M', 
}
        yarn_nodemanager_resource_memory_mb      => 40960,
        yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
        net_topology_script_template             => 
'hadoop/getRackID.py-prod',
    }

    anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 
'cdh5_hadoop_last': }
    
    file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
        content => template('hadoop/capacity-scheduler.xml-prod'),
        require => Class['cdh5::hadoop'],
    }

}

class role::wh::hadoop::qa {
  
    class { 'cdh5::hadoop':
        namenode_hosts                           => [
                                                        
'us3sm2hbqa03r09.comp.prod.local',
                                                        
'us3sm2hbqa04r07.comp.prod.local',
                                                    ],
        rm_hosts                                 => [
                                                        
'us3sm2hbqa03r09.comp.prod.local',
                                                        
'us3sm2hbqa04r07.comp.prod.local',
                                                    ],
        dfs_name_dir                             => 
[['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
        config_directory                         => '/etc/hadoop/conf',
        nameservice_id                           => 'whqa',           # 
This is the logical name of the Hadoop cluster.
        journalnode_hosts                        => [
                                                        
'us3sm2hbqa03r09.comp.prod.local',
                                                        
'us3sm2hbqa04r07.comp.prod.local',
                                                        
'us3sm2hbqa05r08.comp.prod.local',        
                                                    ],
        dfs_journalnode_edits_dir                => 
'/var/lib/hadoop-hdfs/journalEdits',
        datanode_mounts                          => [
                                                      '/data1',
                                                      '/data2'
                                                    ],
        dfs_data_path                            => 'dfs',
        dfs_block_size                           => 268435456,  # 256 MB
        
        # Turn on Snappy compression by default for maps and final outputs
        mapreduce_intermediate_compression       => true,
        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression             => true,
        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression_type        => 'BLOCK',
        yarn_nodemanager_resource_memory_mb      => 24576,
        yarn_resourcemanager_max_completed_applications => 500, 
yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
        net_topology_script_template             => 
'hadoop/getRackID.py-qa',
    }

    anchor { 'cdh5_wh_hadoop_qa_first': } -> Class['cdh5::hadoop'] -> 
anchor { 'cdh5_wh_hadoop_qa_last': }
    
    file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
        content => template('hadoop/capacity-scheduler.xml-qa'),
        require => Class['cdh5::hadoop'],
    }
}

class role::wh::hadoop::dev {

    class { 'cdh5::hadoop':
        namenode_hosts                           => [$::fqdn],
        rm_hosts                                 => [$::fqdn],
        dfs_name_dir                             => 
[['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
        config_directory                         => '/etc/hadoop/conf',
        # nameservice_id                           => 'whdev',
        journalnode_hosts                        => [$::fqdn],
        dfs_journalnode_edits_dir                => 
'/var/lib/hadoop-hdfs/journalEdits',
        datanode_mounts                          => [
                                                      '/data1',
                                                      '/data2'
                                                    ],
        dfs_data_path                            => 'dfs',
        dfs_block_size                           => 67108864,  # 256 MB
        
        # Turn on Snappy compression by default for maps and final outputs
        mapreduce_intermediate_compression       => true,
        mapreduce_intermediate_compression_codec => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression             => true,
        mapreduce_output_compression_codec       => 
'org.apache.hadoop.io.compress.SnappyCodec',
        mapreduce_output_compression_type        => 'BLOCK',
        mapreduce_map_tasks_maximum              => 2,
        mapreduce_reduce_tasks_maximum           => 2,
        yarn_nodemanager_resource_memory_mb      => 4096,
        yarn_resourcemanager_scheduler_class     => 
'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
}


    anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 
'cdh5_hadoop_last': }

    file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
        content => template('hadoop/capacity-scheduler.xml-qa'),
        require => Class['cdh5::hadoop'],
    }
}


Note that the main hadoop class has a yarn_env_java_heap_max parameter that 
I added, which also is given a default value.

-- 
You received this message because you are subscribed to the Google Groups 
"Puppet Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/puppet-users/9d134b61-191b-407c-aabf-9b02c512de37%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to