Generally good, and I'd like to have this datasource enabled by default also. Its great to see cloud's putting instance-id in dmi data.
please see the comments inline. Diff comments: > === modified file 'cloudinit/sources/DataSourceDigitalOcean.py' > --- cloudinit/sources/DataSourceDigitalOcean.py 2016-05-12 17:56:26 > +0000 > +++ cloudinit/sources/DataSourceDigitalOcean.py 2016-07-26 14:34:57 > +0000 > @@ -14,22 +15,27 @@ > # You should have received a copy of the GNU General Public License > # along with this program. If not, see <http://www.gnu.org/licenses/>. > > -from cloudinit import ec2_utils > +# DigitalOcean Droplet API: > +# https://developers.digitalocean.com/documentation/metadata/ > + > +import json > +import os > + > from cloudinit import log as logging > from cloudinit import sources > +from cloudinit import url_helper > from cloudinit import util > > -import functools > - > - > LOG = logging.getLogger(__name__) > > BUILTIN_DS_CONFIG = { > - 'metadata_url': 'http://169.254.169.254/metadata/v1/', > - 'mirrors_url': 'http://mirrors.digitalocean.com/' > + 'metadata_url': 'http://169.254.169.254/metadata/v1.json', > } > -MD_RETRIES = 0 > -MD_TIMEOUT = 1 > + > +# Wait for a up to a minute, retrying the meta-data server > +# every 2 seconds. > +MD_RETRIES = 30 > +MD_TIMEOUT = 2 there is 'timeout' (how long to allow the url request to take before giving up) and there is sec_between. you've used MD_TIMEOUT for both. might as well separate these to 2 variables and update your call to readurl. > > > class DataSourceDigitalOcean(sources.DataSource): > @@ -40,43 +46,67 @@ > util.get_cfg_by_path(sys_cfg, ["datasource", "DigitalOcean"], > {}), > BUILTIN_DS_CONFIG]) > self.metadata_address = self.ds_cfg['metadata_url'] > - > - if self.ds_cfg.get('retries'): > - self.retries = self.ds_cfg['retries'] > - else: > - self.retries = MD_RETRIES > - > - if self.ds_cfg.get('timeout'): > - self.timeout = self.ds_cfg['timeout'] > - else: > - self.timeout = MD_TIMEOUT > - > - def get_data(self): > - caller = functools.partial(util.read_file_or_url, > - timeout=self.timeout, > retries=self.retries) > - > - def mcaller(url): > - return caller(url).contents > - > - md = ec2_utils.MetadataMaterializer(mcaller(self.metadata_address), > - base_url=self.metadata_address, > - caller=mcaller) > - > - self.metadata = md.materialize() > - > - if self.metadata.get('id'): > - return True > - else: > + self.retries = self.ds_cfg.get('retries', MD_RETRIES) > + self.timeout = self.ds_cfg.get('timeout', MD_TIMEOUT) > + > + def _get_sysinfo(self): > + # DigitalOcean embeds vendor ID and instance/droplet_id in the > + # SMBIOS information > + > + # DigitalOcean only supports i386/x86_64 hardware > + uname_arch = os.uname()[4] > + if uname_arch not in ('i386', 'i686', 'x86_64'): > + return (None, None) > + LOG.debug("checking if instance is a DigitalOcean droplet") I'd prefer to just accept anything that says they're DigitalOcean (per the bios), independent of arch. https://bugs.launchpad.net/qemu/+bug/1243287 is a pain, which is probably better fixed in generic util.read_dmi_data (avoiding running on arm*) than it is right now. so either a.) keep the check for arch b.) fix util.read_dmi-data to not try running dmidecode on arm (see the comment in cloudinit/sources/DataSourceSmartOS.py for that bug and just put a similar work around in util). > + > + is_do = False > + droplet_id = None > + > + # Detect if we are on DigitalOcean and return the Droplet's ID > + vendor_name = util.read_dmi_data("system-manufacturer") > + if vendor_name == "DigitalOcean": i'm generally a fan of just getting out of a function early. then you can indent less. if vendor_name != DigitalOcean: return (Nonw, Nonw) > + is_do = True > + LOG.info("running on DigitalOcean") > + > + droplet_id = util.read_dmi_data("system-serial-number") > + > + if droplet_id: > + LOG.debug(("system identified via SMBIOS as DigitalOcean " > + "Droplet {}").format(droplet_id)) > + else: > + LOG.critical(("system identified via SMBIOS as a > DigitalOcean " > + "Droplet, but not provide an ID. " > + "Please file a support ticket at: " > + > "https://cloud.digitalocean.com/support/tickets" > + "/new")) > + > + return (is_do, droplet_id) > + > + def get_data(self, apply_filter=False): > + (is_do, droplet_id) = self._get_sysinfo() > + > + # only proceed if we know we are on DigitalOcean > + if not is_do: > return False > > - def get_userdata_raw(self): > - return "\n".join(self.metadata['user-data']) > - > - def get_vendordata_raw(self): > - return "\n".join(self.metadata['vendor-data']) > + LOG.debug("reading metadata from {}".format(self.metadata_address)) > + response = url_helper.readurl(self.metadata_address, > + timeout=self.timeout, > + sec_between=self.timeout, > + retries=self.retries) > + > + contents = util.decode_binary(response.contents) > + decoded = json.loads(contents) > + > + self.metadata = decoded > + self.metadata['instance-id'] = decoded.get('droplet_id', droplet_id) > + self.metadata['local-hostname'] = decoded.get('hostname', droplet_id) > + self.vendordata_raw = decoded.get("vendor_data", None) > + self.userdata_raw = decoded.get("user_data", None) > + return True > > def get_public_ssh_keys(self): > - public_keys = self.metadata['public-keys'] > + public_keys = self.metadata.get('public_keys', []) > if isinstance(public_keys, list): > return public_keys > else: > @@ -84,21 +114,17 @@ > > @property > def availability_zone(self): > - return self.metadata['region'] > - > - def get_instance_id(self): > - return self.metadata['id'] > - > - def get_hostname(self, fqdn=False, resolve_ip=False): > - return self.metadata['hostname'] > - > - def get_package_mirror_info(self): > - return self.ds_cfg['mirrors_url'] > + return self.metadata.get('region', 'default') > > @property > def launch_index(self): > return None > > + def check_instance_id(self, sys_cfg): this is supposed to check if the instance_id is still valid and return true or false. it will be called on each boot after the first, and if it returns False, a slower path is used, and metadata will end up being re-checked. So, look at the azure source which does this: return sources.instance_id_matches_system_uuid(self.get_instance_id()) I think you might want this: return sources.instance_id_matches_system_uuid( self.get_instance_id(), 'system-serial-number') > + (_, droplet_id) = self._get_sysinfo() > + return droplet_id > + > + > # Used to match classes to dependencies > datasources = [ > (DataSourceDigitalOcean, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), -- https://code.launchpad.net/~utlemming/cloud-init/digitalocean/+merge/301123 Your team cloud init development team is requested to review the proposed merge of lp:~utlemming/cloud-init/digitalocean into lp:cloud-init. _______________________________________________ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp