Repository: climate Updated Branches: refs/heads/master 3539aa2be -> cb9428413
CLIMATE-928 - temporal_subset should trim edges of dataset times to ensure months divide evenly into years Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/cb942841 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/cb942841 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/cb942841 Branch: refs/heads/master Commit: cb9428413baccf8128df7855467838ce1600049c Parents: 3539aa2 Author: Alex Goodman <ago...@users.noreply.github.com> Authored: Wed Sep 27 18:35:18 2017 -0700 Committer: Alex Goodman <ago...@users.noreply.github.com> Committed: Wed Sep 27 18:35:18 2017 -0700 ---------------------------------------------------------------------- ocw/dataset_processor.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/cb942841/ocw/dataset_processor.py ---------------------------------------------------------------------- diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 2097cc4..2227892 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -75,13 +75,18 @@ def temporal_subset(target_dataset, month_start, month_end, name=target_dataset.name) if average_each_year: + new_times = new_dataset.times nmonth = len(month_index) - ntime = new_dataset.times.size + ntime = new_times.size nyear = ntime // nmonth if ntime % nmonth != 0: - raise ValueError("Number of times in dataset ({}) does not " - "divide evenly into {} year(s)." - .format(ntime, nyear)) + logger.warning("Number of times in dataset ({}) does not " + "divide evenly into {} year(s). Trimming data..." + .format(ntime, nyear)) + s_mon = new_times[0].month + e_mon = new_times[-1].month + new_times = new_times[13-s_mon:-e_mon] + nyear = new_times.size // nmonth averaged_time = [] ny, nx = target_dataset.values.shape[1:] @@ -92,7 +97,7 @@ def temporal_subset(target_dataset, month_start, month_end, center_index = int(nmonth / 2 + iyear * nmonth) if nmonth == 1: center_index = iyear - averaged_time.append(new_dataset.times[center_index]) + averaged_time.append(new_times[center_index]) averaged_values[iyear, :] = ma.average(new_dataset.values[ nmonth * iyear: nmonth * iyear + nmonth, :], axis=0) new_dataset = ds.Dataset(target_dataset.lats, @@ -253,7 +258,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, if path.contains_point([new_lons[iy, ix], new_lats[iy, ix]]) or not boundary_check: new_xy_mask[iy, ix] = 0. - + new_index = np.where(new_xy_mask == 0.) # Regrid the data on each time slice for i in range(len(target_dataset.times)): @@ -286,7 +291,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, values_false_indices = np.where(values_original.mask == False) qmdi[values_true_indices] = 1. qmdi[values_false_indices] = 0. - qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), + qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), (new_lons[new_index], new_lats[new_index]), method='nearest') @@ -1441,7 +1446,7 @@ def _are_bounds_contained_by_dataset(dataset, bounds): ''' lat_min, lat_max, lon_min, lon_max = dataset.spatial_boundaries() start, end = dataset.temporal_boundaries() - + errors = [] # TODO: THIS IS TERRIBLY inefficent and we need to use a geometry