CLIMATE-769 Create data source input for NASA JPL PO.DAAC
Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/a847e1b6 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/a847e1b6 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/a847e1b6 Branch: refs/heads/master Commit: a847e1b645a0474f5fd01124fac55a5d8be2fec1 Parents: 116d16f Author: Omkar20895 <omkarreddy2...@gmail.com> Authored: Thu Aug 11 17:45:58 2016 +0530 Committer: Omkar20895 <omkarreddy2...@gmail.com> Committed: Thu Aug 11 17:45:58 2016 +0530 ---------------------------------------------------------------------- ocw/data_source/podaac.py | 71 ++++++++++++++++++++------------------- ocw/tests/test_podaac.py | 76 ++++++++++++++++++++++-------------------- 2 files changed, 77 insertions(+), 70 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/a847e1b6/ocw/data_source/podaac.py ---------------------------------------------------------------------- diff --git a/ocw/data_source/podaac.py b/ocw/data_source/podaac.py index 1eb0195..47a5409 100644 --- a/ocw/data_source/podaac.py +++ b/ocw/data_source/podaac.py @@ -15,14 +15,13 @@ # specific language governing permissions and limitations # under the License. -import sys -sys.path.append('/Users/omkar/Documents/Git/podaacpy/podaac') -from podaac_data_source import Podaac +from podaac_data_source import Podaac import numpy as np from ocw.dataset import Dataset from netCDF4 import Dataset as netcdf_dataset from netcdftime import utime -import os, urllib +import os +import urllib import xml.etree.ElementTree as ET @@ -45,14 +44,13 @@ def _convert_times_to_datetime(time): return [parsed_time.num2date(x) for x in time[:]] - -def load_dataset(variable ,datasetId='', datasetShortName='', name=''): +def load_dataset(variable, datasetId='', datasetShortName='', name=''): '''Loads a Dataset from PODAAC :param variable: The name of the variable to read from the dataset. :type variable: :mod:`string` - :param datasetId: dataset persistent ID. datasetId or \ + :param datasetId: dataset persistent ID. datasetId or \ shortName is required for a granule search. Example: \ PODAAC-ASOP2-25X01 :type datasetId: :mod:`string` @@ -70,41 +68,46 @@ def load_dataset(variable ,datasetId='', datasetShortName='', name=''): :raises: ServerError ''' - #Downloading the dataset using podaac toolkit - podaac = Podaac() - path = os.path.dirname(os.path.abspath(__file__)) - granuleName = podaac.extract_l4_granule(datasetId=datasetId, shortName=datasetShortName, path=path) - path = path+'/'+granuleName - d = netcdf_dataset(path, mode='r') - dataset = d.variables[variable] + # Downloading the dataset using podaac toolkit + podaac = Podaac() + path = os.path.dirname(os.path.abspath(__file__)) + granuleName = podaac.extract_l4_granule( + datasetId=datasetId, shortName=datasetShortName, path=path) + path = path + '/' + granuleName + d = netcdf_dataset(path, mode='r') + dataset = d.variables[variable] # By convention, but not by standard, if the dimensions exist, they will be in the order: # time (t), altitude (z), latitude (y), longitude (x) # but conventions aren't always followed and all dimensions aren't always present so # see if we can make some educated deductions before defaulting to just pulling the first three # columns. - temp_dimensions = map(lambda x:x.lower(),dataset.dimensions) - dataset_dimensions = dataset.dimensions - time = dataset_dimensions[temp_dimensions.index('time') if 'time' in temp_dimensions else 0] - lat = dataset_dimensions[temp_dimensions.index('lat') if 'lat' in temp_dimensions else 1] - lon = dataset_dimensions[temp_dimensions.index('lon') if 'lon' in temp_dimensions else 2] + temp_dimensions = map(lambda x: x.lower(), dataset.dimensions) + dataset_dimensions = dataset.dimensions + time = dataset_dimensions[temp_dimensions.index( + 'time') if 'time' in temp_dimensions else 0] + lat = dataset_dimensions[temp_dimensions.index( + 'lat') if 'lat' in temp_dimensions else 1] + lon = dataset_dimensions[temp_dimensions.index( + 'lon') if 'lon' in temp_dimensions else 2] # Time is given to us in some units since an epoch. We need to convert # these values to datetime objects. Note that we use the main object's # time object and not the dataset specific reference to it. We need to # grab the 'units' from it and it fails on the dataset specific object. - times = np.array(_convert_times_to_datetime(d[time])) - lats = np.array(d.variables[lat][:]) - lons = np.array(d.variables[lon][:]) - values = np.array(dataset[:]) - origin = { - 'source' : 'PO.DAAC', - 'url' : 'podaac.jpl.nasa.gov/ws' - } - - # Removing the downloaded temporary granule before creating the OCW dataset. - d.close() - path = os.path.join(os.path.dirname(__file__), granuleName) - os.remove(path) - - return Dataset(lats, lons, times, values, variable, name=name, origin=origin) + times = np.array(_convert_times_to_datetime(d[time])) + lats = np.array(d.variables[lat][:]) + lons = np.array(d.variables[lon][:]) + values = np.array(dataset[:]) + origin = { + 'source': 'PO.DAAC', + 'url': 'podaac.jpl.nasa.gov/ws' + } + + # Removing the downloaded temporary granule before creating the OCW + # dataset. + d.close() + path = os.path.join(os.path.dirname(__file__), granuleName) + os.remove(path) + + return Dataset(lats, lons, times, values, variable, name=name, origin=origin) http://git-wip-us.apache.org/repos/asf/climate/blob/a847e1b6/ocw/tests/test_podaac.py ---------------------------------------------------------------------- diff --git a/ocw/tests/test_podaac.py b/ocw/tests/test_podaac.py index d0c2808..f696d02 100644 --- a/ocw/tests/test_podaac.py +++ b/ocw/tests/test_podaac.py @@ -17,45 +17,49 @@ import ocw.data_source.podaac as podaac -import unittest, os +import unittest +import os from ocw.dataset import Dataset + class TestPodaacDataSource(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.datasetId = 'PODAAC-GHCMC-4FM02' - cls.datasetShortName = 'CMC0.2deg-CMC-L4-GLOB-v2.0' - cls.variable = 'sea_ice_fraction' - cls.name = 'PO.DAAC_test_dataset' - cls.file_path = os.path.dirname(os.path.abspath(__file__)) - cls.format = '.nc' - cls.dataset = podaac.load_dataset(cls.variable, cls.datasetId, cls.datasetShortName, cls.name) - - def test_is_dataset(self): - self.assertTrue(isinstance(self.dataset, Dataset)) - - def test_dataset_lats(self): - self.assertEquals(len(self.dataset.lats), 901) - - def test_dataset_lons(self): - self.assertEquals(len(self.dataset.lons), 1800) - - def test_dataset_times(self): - self.assertEquals(len(self.dataset.times), 1) - - def test_dataset_values(self): - self.assertEquals(len(self.dataset.values),1) - - def test_valid_date_conversion(self): - start = dt.datetime(2006, 6, 7, 12) - self.assertTrue(start == self.dataset.times[0]) - - def test_dataset_origin(self): - self.assertEquals(self.dataset.origin['source'], 'PO.DAAC') - self.assertEquals(self.dataset.origin['url'], 'podaac.jpl.nasa.gov/ws') - - def test_custom_name(self): - self.assertEquals(self.dataset.name, self.name) + + @classmethod + def setUpClass(cls): + cls.datasetId = 'PODAAC-GHCMC-4FM02' + cls.datasetShortName = 'CMC0.2deg-CMC-L4-GLOB-v2.0' + cls.variable = 'sea_ice_fraction' + cls.name = 'PO.DAAC_test_dataset' + cls.file_path = os.path.dirname(os.path.abspath(__file__)) + cls.format = '.nc' + cls.dataset = podaac.load_dataset( + cls.variable, cls.datasetId, cls.datasetShortName, cls.name) + + def test_is_dataset(self): + self.assertTrue(isinstance(self.dataset, Dataset)) + + def test_dataset_lats(self): + self.assertEquals(len(self.dataset.lats), 901) + + def test_dataset_lons(self): + self.assertEquals(len(self.dataset.lons), 1800) + + def test_dataset_times(self): + self.assertEquals(len(self.dataset.times), 1) + + def test_dataset_values(self): + self.assertEquals(len(self.dataset.values), 1) + + def test_valid_date_conversion(self): + start = dt.datetime(2006, 6, 7, 12) + self.assertTrue(start == self.dataset.times[0]) + + def test_dataset_origin(self): + self.assertEquals(self.dataset.origin['source'], 'PO.DAAC') + self.assertEquals(self.dataset.origin['url'], 'podaac.jpl.nasa.gov/ws') + + def test_custom_name(self): + self.assertEquals(self.dataset.name, self.name) if __name__ == '__main__':