[
https://issues.apache.org/jira/browse/CLIMATE-838?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15413869#comment-15413869
]
ASF GitHub Bot commented on CLIMATE-838:
----------------------------------------
Github user huikyole commented on a diff in the pull request:
https://github.com/apache/climate/pull/384#discussion_r74102524
--- Diff: RCMES/run_RCMES.py ---
@@ -51,123 +65,110 @@
time_info = config['time']
temporal_resolution = time_info['temporal_resolution']
+# Read time info
start_time =
datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
end_time =
datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
+# Read space info
space_info = config['space']
min_lat = space_info['min_lat']
max_lat = space_info['max_lat']
min_lon = space_info['min_lon']
max_lon = space_info['max_lon']
+kwargs = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon,
+ 'max_lon': max_lon, 'start_time': start_time, 'end_time':
end_time}
-ref_data_info = config['datasets']['reference']
+# Get the dataset loader options
+obs_data_info = config['datasets']['reference']
model_data_info = config['datasets']['targets']
-if ref_data_info['data_source'] == 'ESGF' or
model_data_info['data_source'] == 'ESGF':
- username=raw_input('Enter your ESGF OpenID:\n')
- password=getpass(prompt='Enter your ESGF password:\n')
-
-""" Step 1: Load the reference data """
-ref_lat_name = None
-ref_lon_name = None
-if 'latitude_name' in ref_data_info.keys():
- ref_lat_name = ref_data_info['latitude_name']
-if 'longitude_name' in ref_data_info.keys():
- ref_lon_name = ref_data_info['longitude_name']
-print 'Loading observation dataset:\n',ref_data_info
-ref_name = ref_data_info['data_name']
-if ref_data_info['data_source'] == 'local':
- ref_dataset = local.load_file(ref_data_info['path'],
- ref_data_info['variable'], name=ref_name,
- lat_name=ref_lat_name,
lon_name=ref_lon_name)
-elif ref_data_info['data_source'] == 'rcmed':
- ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'],
- ref_data_info['parameter_id'],
- min_lat, max_lat, min_lon,
max_lon,
- start_time, end_time)
-elif ref_data_info['data_source'] == 'ESGF':
- ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'],
- variable = ref_data_info['variable'],
- esgf_username=username,
- esgf_password=password)
- ref_dataset = ds[0]
-else:
- print ' '
-if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
- ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset,
temporal_resolution)
-if 'multiplying_factor' in ref_data_info.keys():
- ref_dataset.values =
ref_dataset.values*ref_data_info['multiplying_factor']
+
+# Extract info we don't want to put into the loader config
+# Multiplying Factor to scale obs by
+multiplying_factor = np.ones(len(obs_data_info))
+for i, info in enumerate(obs_data_info):
+ if 'multiplying_factor' in info:
+ multiplying_factor[i] = info.pop('multiplying_factor')
+
+# If models are GCMs we can skip boundary check. Probably need to find a
more
+# elegant way to express this in the config file API.
+boundary_check = True
+for i, info in enumerate(model_data_info):
+ if 'boundary_check' in info:
+ boundary_check = info.pop('boundary_check')
+
+""" Step 1: Load the observation data """
+print 'Loading observation datasets:\n',obs_data_info
+obs_datasets = load_datasets_from_config(*obs_data_info, **kwargs)
+obs_names = [dataset.name for dataset in obs_datasets]
+for i, dataset in enumerate(obs_datasets):
+ if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
+ obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
+
temporal_resolution)
+
+ if multiplying_factor[i] != 1:
+ obs_dataset.values *= multiplying_factor[i]
""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
-model_lat_name = None
-model_lon_name = None
-if 'latitude_name' in model_data_info.keys():
- model_lat_name = model_data_info['latitude_name']
-if 'longitude_name' in model_data_info.keys():
- model_lon_name = model_data_info['longitude_name']
-boundary_check_model = True
-if 'GCM_data' in model_data_info.keys():
- if model_data_info['GCM_data']:
- boundary_check_model = False
-print 'Loading model datasets:\n',model_data_info
-if model_data_info['data_source'] == 'local':
- model_datasets =
local.load_multiple_files(file_path=model_data_info['path'],
- variable_name
=model_data_info['variable'],
- lat_name=model_lat_name,
lon_name=model_lon_name)
- model_names = [dataset.name for dataset in model_datasets]
-elif model_data_info['data_source'] == 'ESGF':
- md = esgf.load_dataset(dataset_id=model_data_info['dataset_id'],
- variable=model_data_info['variable'],
- esgf_username=username,
- esgf_password=password)
- model_datasets = []
- model_names = []
- model_datasets.append(md[0])
- model_names.append(model_data_info['data_name'])
-else:
- print ' '
- # TO DO: support RCMED
+model_datasets = load_datasets_from_config(*model_data_info, **kwargs)
+model_names = [dataset.name for dataset in model_datasets]
if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
- for idata,dataset in enumerate(model_datasets):
- model_datasets[idata] = dsp.normalize_dataset_datetimes(dataset,
temporal_resolution)
+ for i, dataset in enumerate(model_datasets):
+ model_datasets[i] = dsp.normalize_dataset_datetimes(dataset,
+
temporal_resolution)
""" Step 3: Subset the data for temporal and spatial domain """
# Create a Bounds object to use for subsetting
if time_info['maximum_overlap_period']:
- start_time, end_time =
utils.get_temporal_overlap([ref_dataset]+model_datasets)
+ start_time, end_time =
utils.get_temporal_overlap(obs_datasets+model_datasets)
print 'Maximum overlap period'
print 'start_time:', start_time
print 'end_time:', end_time
if temporal_resolution == 'monthly' and end_time.day !=1:
- end_time = end_time.replace(day=1)
-if ref_data_info['data_source'] == 'rcmed':
- min_lat = np.max([min_lat, ref_dataset.lats.min()])
- max_lat = np.min([max_lat, ref_dataset.lats.max()])
- min_lon = np.max([min_lon, ref_dataset.lons.min()])
- max_lon = np.min([max_lon, ref_dataset.lons.max()])
-bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon,
lon_max=max_lon, start=start_time, end=end_time)
-
-ref_dataset = dsp.subset(ref_dataset, bounds)
-if ref_dataset.temporal_resolution() != temporal_resolution:
- ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution)
-for idata,dataset in enumerate(model_datasets):
- model_datasets[idata] = dsp.subset(dataset, bounds)
+ end_time = end_time.replace(day=1)
+
+for i, dataset in enumerate(obs_datasets):
+ min_lat = np.max([min_lat, dataset.lats.min()])
+ max_lat = np.min([max_lat, dataset.lats.max()])
+ min_lon = np.max([min_lon, dataset.lons.min()])
+ max_lon = np.min([max_lon, dataset.lons.max()])
+
+bounds = Bounds(lat_min=min_lat,
+ lat_max=max_lat,
+ lon_min=min_lon,
+ lon_max=max_lon,
+ start=start_time,
+ end=end_time)
+
+for i, dataset in enumerate(obs_datasets):
+ obs_datasets[i] = dsp.subset(dataset, bounds)
+ if dataset.temporal_resolution() != temporal_resolution:
+ obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
+
+for i, dataset in enumerate(model_datasets):
+ model_datasets[i] = dsp.subset(dataset, bounds)
if dataset.temporal_resolution() != temporal_resolution:
- model_datasets[idata] = dsp.temporal_rebin(dataset,
temporal_resolution)
+ model_datasets[i] = dsp.temporal_rebin(dataset,
temporal_resolution)
-# Temporaly subset both observation and model datasets for the user
specified season
+# Temporally subset both observation and model datasets
+# for the user specified season
month_start = time_info['month_start']
month_end = time_info['month_end']
average_each_year = time_info['average_each_year']
-ref_dataset = dsp.temporal_subset(ref_dataset,month_start,
month_end,average_each_year)
-for idata,dataset in enumerate(model_datasets):
- model_datasets[idata] = dsp.temporal_subset(dataset,month_start,
month_end,average_each_year)
+# TODO: Fully support multiple observation / reference datasets.
--- End diff --
We need to open another JIRA issue for this.
> Update configuration files and example scripts to use new DatasetLoader
> -----------------------------------------------------------------------
>
> Key: CLIMATE-838
> URL: https://issues.apache.org/jira/browse/CLIMATE-838
> Project: Apache Open Climate Workbench
> Issue Type: Improvement
> Components: config, data sources
> Affects Versions: 1.1.0
> Reporter: Alex Goodman
> Assignee: Huikyo Lee
> Fix For: 1.2.0
>
>
> The new {{DatasetLoader}} class found in {{dataset_loader.py}} was designed
> to do the following:
> 1) Unify the loader functions for each data source in one module.
> 2) Greatly simplify the process of parsing input from configuration files
> into the loaders.
> 3) Allow users to easily load datasets from many different data sources in a
> single evaluation run.
> The unit tests I have written can test some of this functionality, but it is
> now time to put it into practice. This means updating the configuration files
> and the associated evaluation scripts (eg run_RCMES.py).
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)