This looks good. A couple of in-line nits but the biggest question is whether this query command is meant to be top-level (cloud-init query) or if it was going to stay behind devel (cloud-init devel query)?
Diff comments: > diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init > index 6d01bf3..ad71be7 100644 > --- a/bash_completion/cloud-init > +++ b/bash_completion/cloud-init > @@ -62,6 +62,8 @@ _cloudinit_complete() > net-convert) > COMPREPLY=($(compgen -W "--help --network-data --kind > --directory --output-kind" -- $cur_word)) > ;; > + query) > + COMPREPLY=($(compgen -W "--help --instance-data > --user-data --vendor-data --debug" -- $cur_word));; Is this going to be a 'devel' sub command or a new top-level? If it's top-level, then you need to modify the subcmds list at the top and then add query to the the 2) case switch. > render) > COMPREPLY=($(compgen -W "--help --instance-data --debug" > -- $cur_word));; > schema) > diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py > new file mode 100644 > index 0000000..c6a6dc8 > --- /dev/null > +++ b/cloudinit/cmd/query.py > @@ -0,0 +1,155 @@ > +# This file is part of cloud-init. See LICENSE file for license information. If this under devel, should the module be under cmd/devel/ ? > + > +"""Query standardized instance metadata from the command line.""" > + > +import argparse > +import os > +import six > +import sys > + > +from cloudinit.handlers.jinja_template import ( > + convert_jinja_instance_data, render_jinja_payload) > +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths > +from cloudinit import log > +from cloudinit.sources import ( > + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) > +from cloudinit import util > + > +NAME = 'query' > +LOG = log.getLogger(NAME) > + > + > +def get_parser(parser=None): > + """Build or extend an arg parser for query utility. > + > + @param parser: Optional existing ArgumentParser instance representing the > + query subcommand which will be extended to support the args of > + this utility. > + > + @returns: ArgumentParser with proper argument configuration. > + """ > + if not parser: > + parser = argparse.ArgumentParser( > + prog=NAME, description='Query cloud-init instance data') > + parser.add_argument( > + '-d', '--debug', action='store_true', default=False, > + help='Add verbose messages during template render') > + parser.add_argument( > + '-i', '--instance-data', type=str, > + help=('Path to instance-data.json file. Default is > /run/cloud-init/%s' > + % INSTANCE_JSON_FILE)) > + parser.add_argument( > + '-l', '--list-keys', action='store_true', default=False, > + help=('List query keys available at the provided instance-data' > + ' <varname>.')) > + parser.add_argument( > + '-u', '--user-data', type=str, > + help=('Path to user-data file. Default is' > + ' /var/lib/cloud/instance/user-data.txt')) > + parser.add_argument( > + '-v', '--vendor-data', type=str, > + help=('Path to vendor-data file. Default is' > + ' /var/lib/cloud/instance/vendor-data.txt')) > + parser.add_argument( > + 'varname', type=str, nargs='?', > + help=('A dot-delimited instance data variable to query from' > + ' instance-data query. For example: v2.local_hostname')) > + parser.add_argument( > + '-a', '--all', action='store_true', default=False, dest='dump_all', > + help='Dump all available instance-data') > + parser.add_argument( > + '-f', '--format', type=str, dest='format', > + help=('Optionally specify a custom output format string. Any' > + ' instance-data variable can be specified between double-curly' > + ' braces. For example -f "{{ v2.cloud_name }}"')) > + return parser > + > + > +def handle_args(name, args): > + """Handle calls to 'cloud-init query' as a subcommand.""" > + paths = None > + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) > + if not any([args.list_keys, args.varname, args.format, args.dump_all]): > + LOG.error( > + 'Expected one of the options: --all, --format,' > + ' --list-keys or varname') > + get_parser().print_help() > + return 1 > + > + if not all([args.instance_data, args.user_data, args.vendor_data]): > + paths = read_cfg_paths() > + if not args.instance_data: > + if os.getuid() == 0: > + default_json_fn = INSTANCE_JSON_SENSITIVE_FILE > + else: > + default_json_fn = INSTANCE_JSON_FILE # World readable Does it make sense to warn on non-root that we're redacting some of the data? > + instance_data_fn = os.path.join(paths.run_dir, default_json_fn) > + else: > + instance_data_fn = args.instance_data > + if not args.user_data: > + user_data_fn = paths.get_ipath('userdata') > + else: > + user_data_fn = args.user_data > + if not args.vendor_data: > + vendor_data_fn = paths.get_ipath('vendordata') > + else: > + vendor_data_fn = args.vendor_data > + > + try: > + with open(instance_data_fn) as stream: > + instance_json = stream.read() > + except IOError: > + LOG.error('Missing instance-data.json file: %s', instance_data_fn) > + return 1 util.load_file() ? > + > + instance_data = util.load_json(instance_json) > + if os.getuid() != 0: shouldn't we store os.getuid() since we reference this in multiple places? > + instance_data['userdata'] = ( > + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) > + instance_data['vendordata'] = ( > + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) > + else: > + instance_data['userdata'] = util.load_file(user_data_fn) > + instance_data['vendordata'] = util.load_file(vendor_data_fn) > + if args.format: > + payload = '## template: jinja\n{fmt}'.format(fmt=args.format) > + rendered_payload = render_jinja_payload( > + payload=payload, payload_fn='query commandline', > + instance_data=instance_data, > + debug=True if args.debug else False) > + if rendered_payload: > + print(rendered_payload) > + return 0 > + return 1 > + > + response = convert_jinja_instance_data(instance_data) > + if args.varname: > + try: > + for var in args.varname.split('.'): > + response = response[var] > + except KeyError: > + LOG.error('Undefined instance-data key %s', args.varname) > + return 1 > + if args.list_keys: > + if not isinstance(response, dict): > + LOG.error("--list-keys provided but '%s' is not a dict", var) > + return 1 > + response = '\n'.join(sorted(response.keys())) > + elif args.list_keys: > + response = '\n'.join(sorted(response.keys())) > + if not isinstance(response, six.string_types): > + response = util.json_dumps(response) > + print(response) > + return 0 > + > + > +def main(): > + """Tool to query specific instance-data values.""" > + parser = get_parser() > + sys.exit(handle_args(NAME, parser.parse_args())) > + > + > +if __name__ == '__main__': > + main() > + > +# vi: ts=4 expandtab > diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py > index a775f1a..a0bd763 100644 > --- a/cloudinit/sources/__init__.py > +++ b/cloudinit/sources/__init__.py > @@ -152,12 +184,19 @@ class DataSource(object): > > def _get_standardized_metadata(self): > """Return a dictionary of standardized metadata keys.""" > - return {'v1': { > - 'local-hostname': self.get_hostname(), > - 'instance-id': self.get_instance_id(), > - 'cloud-name': self.cloud_name, > - 'region': self.region, > - 'availability-zone': self.availability_zone}} > + local_hostname = self.get_hostname() > + instance_id = self.get_instance_id() > + availability_zone = self.availability_zone > + return { > + 'v1': { > + 'availability-zone': availability_zone, Should we document what any new additions to this format should look like? IIRC, we want to use _ instead of - so we can jinja things, right? > + 'availability_zone': availability_zone, > + 'cloud-name': self.cloud_name, > + 'instance-id': instance_id, > + 'instance_id': instance_id, > + 'local-hostname': local_hostname, > + 'local_hostname': local_hostname, > + 'region': self.region}} > > def clear_cached_attrs(self, attr_defaults=()): > """Reset any cached metadata attributes to datasource defaults. > diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst > index 2d8e253..0d8b894 100644 > --- a/doc/rtd/topics/capabilities.rst > +++ b/doc/rtd/topics/capabilities.rst > @@ -18,7 +18,7 @@ User configurability > > User-data can be given by the user at instance launch time. See > :ref:`user_data_formats` for acceptable user-data content. > - > + whitespace ? > > This is done via the ``--user-data`` or ``--user-data-file`` argument to > ec2-run-instances for example. > @@ -53,10 +53,9 @@ system: > > % cloud-init --help > usage: cloud-init [-h] [--version] [--file FILES] > - > [--debug] [--force] > - > {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} > - ... > + > {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} this puts query at top-level? which is it? > + ... > > optional arguments: > -h, --help show this help message and exit > diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst > new file mode 100644 > index 0000000..3e25748 > --- /dev/null > +++ b/doc/rtd/topics/instancedata.rst > @@ -0,0 +1,290 @@ > +.. _instance_metadata: > + > +***************** > +Instance Metadata > +***************** > + > +What is a instance data? > +======================== > + > +Instance data is the collection of all configuration data that cloud-init > +processes to configure the instance. This configuration typically > +comes from any number of sources: > + > +* cloud-provided metadata services (aka metadata) > +* custom config-drive attached to the instance > +* cloud-config seed files in the booted cloud image or distribution > +* vendordata provided from files or cloud metadata services (overrides > + metadata) overrides defaults? overrides metadata may confuse users thinking this will override user-data (which is part of the metadata if provided). I wouldn't say override; maybe vendor/platform specific defaults. > +* user-data provided at instance creation (overrides vendordata and metadata) > + > +Each cloud provider presents unique configuration metadata in different > +formats to the instance. Cloud-init provides a cache of any crawled metadata > +as well as a versioned set of standardized instance data keys which it makes > +available on all platforms. > + > +Cloud-init produces a simple json object in > +``/run/cloud-init/instance-data.json`` which represents standardized and > +versioned representation of the metadata it consumes during initial boot. The > +intent is to provide the following benefits to users or scripts on any system > +deployed with cloud-init: > + > +* simple static object to query to obtain a cloud's metadata s/clouds's/instance's > +* speed: avoid costly network transactions for metadata that is already > cached > + on the filesytem > +* reduce need to recrawl metadata services for static metadata that is > already > + cached > +* leverage cloud-init's best practices for crawling cloud-metadata services > +* avoid rolling unique metadata crawlers on each cloud platform to get > + metadata configuration values > + > +Cloud-init stores any instance data processed in the following files: > + > +* ``/run/cloud-init/instance-data.json``: world-readable json containing > + standardized keys, sensitive keys redacted Do we have an easy way to show (or do we document) which keys we consider sensitive? > +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable sensitive > + json blob unredacted root-readable unredacted json blob > +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw > + userdata > +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw > + vendordata > + > +Cloud-init redacts any security sensitive content from instance-data.json, > +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. > +Because user-data and vendor-data can contain passwords both of these files > +are readonly for *root* as well. The *root* user can also read > +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data > +from instance-data.json as well as unredacted sensitive content. > + > + > +Format of instance-data.json > +============================ > + > +The instance-data.json and instance-data-sensitive.json files are well-formed I suggest bolding the file names ``instance-data.json`` and ``instance-data-sensitive.json``. > +JSON and record the set of keys and values for any metadata processed by > +cloud-init. Cloud-init standardizes the format for this content so that it > +can be generalized across different cloud platforms. > + > +There are three basic top-level keys: > + > +* **base64_encoded_keys**: A list of forward-slash delimited key paths into > + the instance-data.json object whose value is base64encoded for json > + compatibility. Values at these paths should be decoded to get the original > + value. > + > +* **ds**: Datasource-specific metadata crawled for the specific cloud > + platform. It should closely represent the structure of the cloud metadata > + crawled. The structure of content and details provided are entirely > + cloud-dependent. Mileage will vary depending on what the cloud exposes. > + The content exposed under the 'ds' key is currently **experimental** and > + expected to change slightly in the upcoming cloud-init release. > + > +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to > + exist on all cloud platforms. They will also retain their current behavior > + and format and will be carried forward even if cloud-init introduces a new > + version of standardized keys with **v2**. > + > +The standardized keys present: > + > ++----------------------+-----------------------------------------------+---------------------------+ > +| Key path | Description | > Examples | > ++======================+===============================================+===========================+ > +| v1.cloud-name | The name of the cloud provided by metadata | > aws, openstack, azure, | > +| | key 'cloud-name' or the cloud-init datasource | > configdrive, nocloud, | > +| | name which was discovered. | > configdrive, etc. | you've configdrive twice, maybe put in OVF > ++----------------------+-----------------------------------------------+---------------------------+ > +| v1.instance_id | Unique instance_id allocated by the cloud | > i-<somehash> | > ++----------------------+-----------------------------------------------+---------------------------+ > +| v1.local_hostname | The internal or local hostname of the system | > ip-10-41-41-70, | > +| | | > <user-provided-hostname> | > ++----------------------+-----------------------------------------------+---------------------------+ > +| v1.region | The physical region/datacenter in which the | > us-east-2 | > +| | instance is deployed | > | > ++----------------------+-----------------------------------------------+---------------------------+ > +| v1.availability_zone | The physical availability zone in which the | > us-east-2b, nova, null | > +| | instance is deployed | > | > ++----------------------+-----------------------------------------------+---------------------------+ > + > + > +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 > +instance: > + > +.. sourcecode:: json > + > + { > + "base64_encoded_keys": [], > + "ds": { > + "meta_data": { > + "ami-id": "ami-014e1416b628b0cbf", > + "ami-launch-index": "0", > + "ami-manifest-path": "(unknown)", > + "block-device-mapping": { > + "ami": "/dev/sda1", > + "ephemeral0": "sdb", > + "ephemeral1": "sdc", > + "root": "/dev/sda1" > + }, > + "hostname": "ip-10-41-41-70.us-east-2.compute.internal", > + "instance-action": "none", > + "instance-id": "i-04fa31cfc55aa7976", > + "instance-type": "t2.micro", > + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", > + "local-ipv4": "10.41.41.70", > + "mac": "06:b6:92:dd:9d:24", > + "metrics": { > + "vhostmd": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" > + }, > + "network": { > + "interfaces": { > + "macs": { > + "06:b6:92:dd:9d:24": { > + "device-number": "0", > + "interface-id": "eni-08c0c9fdb99b6e6f4", > + "ipv4-associations": { > + "18.224.22.43": "10.41.41.70" > + }, > + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", > + "local-ipv4s": "10.41.41.70", > + "mac": "06:b6:92:dd:9d:24", > + "owner-id": "437526006925", > + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", > + "public-ipv4s": "18.224.22.43", > + "security-group-ids": "sg-828247e9", > + "security-groups": "Cloud-init integration test secgroup", > + "subnet-id": "subnet-282f3053", > + "subnet-ipv4-cidr-block": "10.41.41.0/24", > + "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", > + "vpc-id": "vpc-252ef24d", > + "vpc-ipv4-cidr-block": "10.41.0.0/16", > + "vpc-ipv4-cidr-blocks": "10.41.0.0/16", > + "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" > + } > + } > + } > + }, > + "placement": { > + "availability-zone": "us-east-2b" > + }, > + "profile": "default-hvm", > + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", > + "public-ipv4": "18.224.22.43", > + "public-keys": { > + "cloud-init-integration": [ > + "ssh-rsa > + > AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB > + cloud-init-integration" > + ] > + }, > + "reservation-id": "r-06ab75e9346f54333", > + "security-groups": "Cloud-init integration test secgroup", > + "services": { > + "domain": "amazonaws.com", > + "partition": "aws" > + } > + } > + }, > + "v1": { > + "availability-zone": "us-east-2b", > + "availability_zone": "us-east-2b", > + "cloud-name": "aws", > + "instance-id": "i-04fa31cfc55aa7976", > + "instance_id": "i-04fa31cfc55aa7976", > + "local-hostname": "ip-10-41-41-70", > + "local_hostname": "ip-10-41-41-70", > + "region": "us-east-2" > + } > + } > + > + > +Using instance-data > +=================== > + > +As of cloud-init v. 18.4, any variables present in > +``/run/cloud-init/instance-data.json`` can be used in: > + > +* User-data scripts > +* Cloud config data > +* Command line interface via **cloud-init query** or > + **cloud-init devel render** > + > +Many clouds allow users to provide user-data to an instance at > +the time the instance is launched. Cloud-init supports a number of > +:ref:`user_data_formats`. > + > +Both user-data scripts and **#cloud-config** data support jinja template > +rendering. > +When the first line of the provided user-data begins with, > +**## template: jinja** cloud-init will use jinja to render that file. > +Any instance-data.json variables are surfaced as dot-delimited jinja template what about instance-data-sensitive.json variables/values? > +variables. > + > + > +Below are some examples of providing these types of user-data: > + > +* Cloud config calling home with the ec2 public hostname and > avaliability-zone > + > +.. code-block:: shell-session > + > + ## template: jinja > + #cloud-config > + runcmd: > + - echo 'EC2 public hostname allocated to instance: {{ > + ds.meta_data.public_hostname }}' > /tmp/instance_metadata > + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> > + /tmp/instance_metadata > + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", > + "availability-zone": "{{ v1.availability_zone }}"}' > + https://example.com.com com.com ? > + > +* Custom user-data script performing different operations based on region > + > +.. code-block:: shell-session > + > + ## template: jinja > + #!/bin/bash > + {% if v1.region == 'us-east-2' -%} > + echo 'Installing custom proxies for {{ v1.region }} > + sudo apt-get install my-xtra-fast-stack sudo apt install > + {%- endif %} > + ... > + > +.. note:: > + Trying to reference jinja variables that don't exist in > + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` > + and the following string in your rendered user-data: > + ``CI_MISSING_JINJA_VAR/<your_varname>``. > + > +Cloud-init also surfaces a commandline tool **cloud-init query** which can > +assist developers or scripts with obtaining instance metadata easily. See > +:ref:`cli_query` for more information. > + > +To cut down on keystrokes on the command line, cloud-init also provides > +top-level key aliases for any standardized ``v#`` keys present. The preceding > +``v1`` is not required of ``v1.var_name`` These aliases will represent the > +value of the highest versioned standard key. For example, ``cloud_name`` > +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in > +instance-data.json. > +The **query** command also publishes ``userdata`` and ``vendordata`` keys to > +the root user which will contain the decoded user and vendor data provided to > +this instance. Non-root users referencing userdata or vendordata keys will > +see only redacted values. > + > +.. code-block:: shell-session > + > + # List all top-level instance-data keys available > + % cloud-init query --list-keys > + > + # Find your EC2 ami-id > + % cloud-init query ds.metadata.ami_id > + > + # Format your cloud_name and region using jinja template syntax > + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ > + % v1.region }}' > + > +.. note:: > + To save time designing a user-data template for a specific cloud's > + instance-data.json, use the 'render' cloud-init command on an > + instance booted on your favorite cloud. See :ref:`cli_devel` for more > + information. > + > +.. vi: textwidth=78 -- https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/354891 Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:feature/cli-cloudinit-query into cloud-init:master. _______________________________________________ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp