Repository: incubator-vxquery
Updated Branches:
  refs/heads/prestonc/benchmarks_staging 1e7880caf -> eaed030b6


Updated the generated station xml to mirror web service.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/9456d4ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/9456d4ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/9456d4ad

Branch: refs/heads/prestonc/benchmarks_staging
Commit: 9456d4ad6074b0c1b60a2105395a84a12885fd80
Parents: 1e7880c
Author: Preston Carman <[email protected]>
Authored: Wed Mar 5 21:56:13 2014 -0800
Committer: Preston Carman <[email protected]>
Committed: Wed Mar 5 21:56:13 2014 -0800

----------------------------------------------------------------------
 .../noaa-ghcn-daily/scripts/weather_cli.py      |   6 +-
 .../scripts/weather_config_ghcnd.py             |  96 ++++++++++
 .../scripts/weather_config_mshr.py              |  78 ++++++++
 .../scripts/weather_convert_to_xml.py           | 183 +++++++++++++++++--
 .../scripts/weather_data_files.py               |   2 +-
 .../scripts/weather_dly_config.py               |  96 ----------
 .../scripts/weather_download_files.py           |  33 +++-
 7 files changed, 374 insertions(+), 120 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 103c0d1..8d18607 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -127,10 +127,12 @@ def main(argv):
     if section in ("all", "download"):
         print 'Processing the download section.'
         download = WeatherDownloadFiles(download_path)
-        download.download_all_files(reset)
+        download.download_ghcnd_files(reset)
+        download.download_mshr_files(reset)
 
         # Unzip the required file.
-        download.unzip_package(config.get_package(), reset)
+        download.unzip_ghcnd_package(config.get_package(), reset)
+        download.unzip_mshr_files(reset)
 
 
     # Create some basic paths for save files and references.

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
new file mode 100644
index 0000000..801e748
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base URL used to get all the required files.
+BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
+
+# List of required files for a build.
+FILE_NAMES = []
+FILE_NAMES.append('ghcnd-countries.txt')
+FILE_NAMES.append('ghcnd-inventory.txt')
+FILE_NAMES.append('ghcnd-states.txt')
+FILE_NAMES.append('ghcnd-stations.txt')
+FILE_NAMES.append('ghcnd-version.txt')
+FILE_NAMES.append('ghcnd_all.tar.gz')
+FILE_NAMES.append('ghcnd_gsn.tar.gz')
+FILE_NAMES.append('ghcnd_hcn.tar.gz')
+FILE_NAMES.append('readme.txt')
+FILE_NAMES.append('status.txt')
+
+# Store the row details here.
+
+# Index values of each field details.
+FIELD_INDEX_NAME = 0
+FIELD_INDEX_START = 1
+FIELD_INDEX_END = 2
+FIELD_INDEX_TYPE = 3
+
+DLY_FIELD_ID = 0
+DLY_FIELD_YEAR = 1
+DLY_FIELD_MONTH = 2
+DLY_FIELD_ELEMENT = 3
+
+DLY_FIELD_DAY_OFFSET = 4
+DLY_FIELD_DAY_FIELDS = 4
+
+DLY_FIELDS = []
+
+# Details about the row.
+DLY_FIELDS.append(['ID', 1, 11, 'Character'])
+DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
+DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
+DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
+
+# Days in each row.
+for i in range(1, 32):
+    start = 22 + ((i - 1) * 8)
+    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
+    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 
'Character'])
+    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 
'Character'])
+    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 
'Character'])
+
+# Details about the row.
+STATIONS_FIELDS = {}
+STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
+STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
+STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
+STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
+STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
+STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
+
+# Details about the row.
+COUNTRIES_FIELDS = {}
+COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+STATES_FIELDS = {}
+STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+INVENTORY_FIELDS = []
+INVENTORY_FIELDS.append(['ID', 1, 11, 'Character'])
+INVENTORY_FIELDS.append(['LATITUDE', 13, 20, 'Real'])
+INVENTORY_FIELDS.append(['LONGITUDE', 22, 30, 'Real'])
+INVENTORY_FIELDS.append(['ELEMENT', 32, 35, 'Character'])
+INVENTORY_FIELDS.append(['FIRSTYEAR', 37, 40, 'Integer'])
+INVENTORY_FIELDS.append(['LASTYEAR', 42, 45, 'Integer'])
+

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
new file mode 100644
index 0000000..7b1434f
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# List of required files for a build.
+MSHR_URLS = []
+MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
+MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
+
+# Index values of each field details.
+MSHR_FIELD_INDEX_NAME = 0
+MSHR_FIELD_INDEX_START = 1
+MSHR_FIELD_INDEX_END = 2
+MSHR_FIELD_INDEX_TYPE = 3
+
+# Store the row details here.
+MSHR_FIELDS = {}
+
+# Details about the row.
+MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
+MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
+MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
+MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
+MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
+MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
+MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
+MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
+MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
+MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
+MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
+MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
+MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
+MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
+MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
+MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 
'X(30)']
+MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
+MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
+MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
+MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
+MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
+MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
+MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
+MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
+MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
+MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
+MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
+MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
+MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
+MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
+MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
+MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
+MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
+MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
+MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
+MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
+MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
+MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
+MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
+MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
+MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
+MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
+MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
+MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
index 36aff16..1aee4a7 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -18,13 +18,75 @@ import textwrap
 from datetime import date
 import os
 import gzip
+from collections import OrderedDict
 
 # Custom modules.
-from weather_dly_config import *
+from weather_config_ghcnd import *
+from weather_config_mshr import *
 from weather_download_files import *
 
 class WeatherConvertToXML:
     
+    STATES = OrderedDict({
+        'AK': 'Alaska',
+        'AL': 'Alabama',
+        'AR': 'Arkansas',
+        'AS': 'American Samoa',
+        'AZ': 'Arizona',
+        'CA': 'California',
+        'CO': 'Colorado',
+        'CT': 'Connecticut',
+        'DC': 'District of Columbia',
+        'DE': 'Delaware',
+        'FL': 'Florida',
+        'GA': 'Georgia',
+        'GU': 'Guam',
+        'HI': 'Hawaii',
+        'IA': 'Iowa',
+        'ID': 'Idaho',
+        'IL': 'Illinois',
+        'IN': 'Indiana',
+        'KS': 'Kansas',
+        'KY': 'Kentucky',
+        'LA': 'Louisiana',
+        'MA': 'Massachusetts',
+        'MD': 'Maryland',
+        'ME': 'Maine',
+        'MI': 'Michigan',
+        'MN': 'Minnesota',
+        'MO': 'Missouri',
+        'MP': 'Northern Mariana Islands',
+        'MS': 'Mississippi',
+        'MT': 'Montana',
+        'NA': 'National',
+        'NC': 'North Carolina',
+        'ND': 'North Dakota',
+        'NE': 'Nebraska',
+        'NH': 'New Hampshire',
+        'NJ': 'New Jersey',
+        'NM': 'New Mexico',
+        'NV': 'Nevada',
+        'NY': 'New York',
+        'OH': 'Ohio',
+        'OK': 'Oklahoma',
+        'OR': 'Oregon',
+        'PA': 'Pennsylvania',
+        'PR': 'Puerto Rico',
+        'RI': 'Rhode Island',
+        'SC': 'South Carolina',
+        'SD': 'South Dakota',
+        'TN': 'Tennessee',
+        'TX': 'Texas',
+        'UT': 'Utah',
+        'VA': 'Virginia',
+        'VI': 'Virgin Islands',
+        'VT': 'Vermont',
+        'WA': 'Washington',
+        'WI': 'Wisconsin',
+        'WV': 'West Virginia',
+        'WY': 'Wyoming'
+    })
+    
     MONTHS = [
         "January",
         "February",
@@ -51,6 +113,9 @@ class WeatherConvertToXML:
         self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
         self.ghcnd_states = base_path + '/ghcnd-states.txt'
         self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
+
+        # MSHR support files.
+        self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
         
     def set_token(self, token):
         self.token = token
@@ -109,7 +174,7 @@ class WeatherConvertToXML:
         row = file_stream.readline()
         return self.process_station_data(row)
 
-    def process_sensor_file(self, file_name, max_files, sensor_max = 99):
+    def process_sensor_file(self, file_name, max_files, sensor_max=99):
         print "Processing sensor file: " + file_name
         file_stream = open(file_name, 'r')
     
@@ -164,17 +229,30 @@ class WeatherConvertToXML:
                 <credit_URL>http://www.ncdc.noaa.gov/</credit_URL>
             """)
     
-    def default_xml_web_service_start(self, total_records):
+    def default_xml_web_service_start(self):
         field_xml = ""
         field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" 
standalone=\"yes\"?>\n"
+        return field_xml
+    
+    def default_xml_data_start(self, total_records):
+        field_xml = ""
         field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + 
str(total_records) + "\">\n"
         return field_xml
     
+    def default_xml_station_start(self):
+        field_xml = ""
+        field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" 
totalCount=\"1\">\n"
+        return field_xml
+    
     def default_xml_field_date(self, report_date, indent=2):
         field_xml = ""
         field_xml += self.get_indent_space(indent) + "<date>" + 
str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + 
str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
         return field_xml
     
+    def get_date_from_field(self, row, field):
+        report_date = self.get_field_from_definition(row, field)
+        return str(report_date.year) + "-" + str(report_date.month).zfill(2) + 
"-" + str(report_date.day).zfill(2)
+    
     def default_xml_field_date_iso8601(self, report_date):
         field_xml = ""
         field_xml += "    <observation_date>" + self.MONTHS[report_date.month 
- 1] + " " + str(report_date.day) + ", " + str(report_date.year) + 
"</observation_date>\n"
@@ -241,6 +319,72 @@ class WeatherConvertToXML:
     
         return field_xml
     
+    def default_xml_mshr_station_additional(self, station_id):
+        """The web service station data is generate from the MSHR data 
supplemented with GHCN-Daily."""
+        station_mshr_row = ""
+        stations_mshr_file = open(self.mshr_stations, 'r')
+        for line in stations_mshr_file:
+            if station_id == self.get_field_from_definition(line, 
MSHR_FIELDS['GHCND_ID']).strip():
+                station_mshr_row = line
+                break
+        
+        if station_mshr_row == "":
+            return ""
+
+        additional_xml = ""
+
+        county = self.get_field_from_definition(station_mshr_row, 
MSHR_FIELDS['COUNTY']).strip()
+        if county != "":
+            additional_xml += self.default_xml_location_labels("CNTY", 
"FIPS:-9999", county)
+            
+        country_code = self.get_field_from_definition(station_mshr_row, 
MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
+        country_name = self.get_field_from_definition(station_mshr_row, 
MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
+        if country_code != "" and country_name != "":
+            additional_xml += self.default_xml_location_labels("CNTRY", 
"FIPS:"+country_code, country_name)
+        
+        return additional_xml
+
+    def default_xml_location_labels(self, type, id, display_name):
+        label_xml = ""
+        label_xml += self.default_xml_start_tag("locationLabels", 2)
+        label_xml += self.default_xml_element("type", type, 3)
+        label_xml += self.default_xml_element("id", id, 3)
+        label_xml += self.default_xml_element("displayName", display_name, 3)
+        label_xml += self.default_xml_end_tag("locationLabels", 2)
+        return label_xml
+        
+
+    def default_xml_web_service_station(self, station_id):
+        """The web service station data is generate from available historical 
sources."""
+        station_ghcnd_row = ""
+        stations_ghcnd_file = open(self.ghcnd_stations, 'r')
+        for line in stations_ghcnd_file:
+            if station_id == self.get_field_from_definition(line, 
STATIONS_FIELDS['ID']):
+                station_ghcnd_row = line
+                break
+    
+        xml_station = ""
+        xml_station += self.default_xml_start_tag("station", 1)
+        
+        xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
+        xml_station += self.default_xml_element("displayName", 
self.get_field_from_definition(station_ghcnd_row, 
STATIONS_FIELDS['NAME']).strip(), 2)
+        xml_station += self.default_xml_element("latitude", 
self.get_field_from_definition(station_ghcnd_row, 
STATIONS_FIELDS['LATITUDE']).strip(), 2)
+        xml_station += self.default_xml_element("longitude", 
self.get_field_from_definition(station_ghcnd_row, 
STATIONS_FIELDS['LONGITUDE']).strip(), 2)
+        
+        elevation = self.get_field_from_definition(station_ghcnd_row, 
STATIONS_FIELDS['ELEVATION']).strip()
+        if elevation != "-999.9":
+            xml_station += self.default_xml_element("elevation", elevation, 2)
+        
+        state_code = self.get_field_from_definition(station_ghcnd_row, 
STATIONS_FIELDS['STATE']).strip()
+        if state_code != "":
+            xml_station += self.default_xml_location_labels("ST", "FIPS:" + 
str(self.STATES.keys().index(state_code)), self.STATES[state_code])
+        
+        # Add the MSHR data to the station generated information.
+        xml_station += self.default_xml_mshr_station_additional(station_id)
+            
+        xml_station += self.default_xml_end_tag("station", 1)
+        return xml_station
+        
     def default_xml_day_reading_as_field(self, row, day):
         day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
         value = self.get_dly_field(row, day_index);
@@ -306,8 +450,14 @@ class WeatherConvertToXML:
         return textwrap.dedent("""\
             </ghcnd_observation>""")
 
-    def default_xml_web_service_end(self):
-        return "</dataCollection>"
+    def default_xml_data_end(self):
+        return self.default_xml_end_tag("dataCollection", 0)
+
+    def default_xml_station_end(self):
+        return self.default_xml_end_tag("stationCollection", 0)
+
+    def default_xml_element(self, tag, data, indent=1):
+        return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + 
tag + ">\n"
 
     def default_xml_start_tag(self, tag, indent=1):
         return self.get_indent_space(indent) + "<" + tag + ">\n"
@@ -434,9 +584,11 @@ class WeatherMonthlyXMLFile(WeatherConvertToXML):
             return 0
 
 class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
+    """The web service class details how to create files similar to the NOAA 
web service."""
     skip_downloading = False
     # Station data
     def process_station_data(self, row):
+        """Adds a single station record file either from downloading the data 
or generating a similar record."""
         station_id = self.get_dly_field(row, DLY_FIELD_ID)
         download = 0
         if self.token is not "" and not self.skip_downloading:
@@ -444,15 +596,20 @@ class 
WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
             if download == 0:
                 self.skip_downloading = True
         
-        # If not downloaded generate.
+        # If not downloaded, generate.
         if download != 0:
             return download
         else:
             # Information for each daily file.
-            station_xml_file = self.default_xml_start()
-            station_xml_file += self.default_xml_field_station(station_id)
-            station_xml_file += self.default_xml_end()
+            station_xml_file = self.default_xml_web_service_start()
+            station_xml_file += self.default_xml_station_start()
+            station_xml_file += 
self.default_xml_web_service_station(station_id)
+            station_xml_file += self.default_xml_station_end()
             
+            # Remove white space.
+            station_xml_file = station_xml_file.replace("\n", "");
+            station_xml_file = 
station_xml_file.replace(self.get_indent_space(1), "");
+
             # Make sure the station folder is available.
             ghcnd_xml_station_path = self.get_base_folder(station_id, 
"stations")
             if not os.path.isdir(ghcnd_xml_station_path):
@@ -470,9 +627,10 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
                 return 0
 
     # Station data
-    def download_station_data(self, station_id, token, reset = False):
+    def download_station_data(self, station_id, token, reset=False):
+        """Downloads the station data from the web service."""
         import time
-        time.sleep(10)
+        time.sleep(2)
         # Make sure the station folder is available.
         ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
         if not os.path.isdir(ghcnd_xml_station_path):
@@ -506,6 +664,7 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
 
     # Sensor data
     def process_one_month_sensor_set(self, records, page):
+        """Generates records for a station using the web service xml layout."""
         found_data = False        
         year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
         month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
@@ -535,7 +694,7 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
             except ValueError:
                 pass
 
-        daily_xml_file = self.default_xml_web_service_start(count) + 
daily_xml_file + self.default_xml_web_service_end()
+        daily_xml_file = self.default_xml_web_service_start() + 
self.default_xml_data_start(count) + daily_xml_file + 
self.default_xml_data_end()
         daily_xml_file = daily_xml_file.replace("\n", "");
         daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
 

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index 8e26e99..da2afcc 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -92,8 +92,8 @@ class WeatherDataFiles:
             self.close_progress_data(True)
         self.reset()
         
-    # Once the initial data has been generated, the data can be copied into a 
set number of partitions. 
     def copy_to_n_partitions(self, save_path, partitions, base_paths=[]):
+        """Once the initial data has been generated, the data can be copied 
into a set number of partitions. """
         if (len(base_paths) == 0):
             return
         

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
deleted file mode 100644
index 801e748..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Base URL used to get all the required files.
-BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
-
-# List of required files for a build.
-FILE_NAMES = []
-FILE_NAMES.append('ghcnd-countries.txt')
-FILE_NAMES.append('ghcnd-inventory.txt')
-FILE_NAMES.append('ghcnd-states.txt')
-FILE_NAMES.append('ghcnd-stations.txt')
-FILE_NAMES.append('ghcnd-version.txt')
-FILE_NAMES.append('ghcnd_all.tar.gz')
-FILE_NAMES.append('ghcnd_gsn.tar.gz')
-FILE_NAMES.append('ghcnd_hcn.tar.gz')
-FILE_NAMES.append('readme.txt')
-FILE_NAMES.append('status.txt')
-
-# Store the row details here.
-
-# Index values of each field details.
-FIELD_INDEX_NAME = 0
-FIELD_INDEX_START = 1
-FIELD_INDEX_END = 2
-FIELD_INDEX_TYPE = 3
-
-DLY_FIELD_ID = 0
-DLY_FIELD_YEAR = 1
-DLY_FIELD_MONTH = 2
-DLY_FIELD_ELEMENT = 3
-
-DLY_FIELD_DAY_OFFSET = 4
-DLY_FIELD_DAY_FIELDS = 4
-
-DLY_FIELDS = []
-
-# Details about the row.
-DLY_FIELDS.append(['ID', 1, 11, 'Character'])
-DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
-DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
-DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
-
-# Days in each row.
-for i in range(1, 32):
-    start = 22 + ((i - 1) * 8)
-    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
-    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 
'Character'])
-    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 
'Character'])
-    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 
'Character'])
-
-# Details about the row.
-STATIONS_FIELDS = {}
-STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
-STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
-STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
-STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
-STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
-STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
-
-# Details about the row.
-COUNTRIES_FIELDS = {}
-COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-STATES_FIELDS = {}
-STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-INVENTORY_FIELDS = []
-INVENTORY_FIELDS.append(['ID', 1, 11, 'Character'])
-INVENTORY_FIELDS.append(['LATITUDE', 13, 20, 'Real'])
-INVENTORY_FIELDS.append(['LONGITUDE', 22, 30, 'Real'])
-INVENTORY_FIELDS.append(['ELEMENT', 32, 35, 'Character'])
-INVENTORY_FIELDS.append(['FIRSTYEAR', 37, 40, 'Integer'])
-INVENTORY_FIELDS.append(['LASTYEAR', 42, 45, 'Integer'])
-

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
index 87adb11..fb59b50 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -19,9 +19,11 @@ import os.path
 import shutil
 import tarfile
 import urllib
+import zipfile
 
 # Custom modules.
-from weather_dly_config import *
+from weather_config_ghcnd import *
+from weather_config_mshr import *
 
 class WeatherDownloadFiles:
 
@@ -32,14 +34,18 @@ class WeatherDownloadFiles:
             os.makedirs(save_path)
 
 
-    # Download the complete list
-    def download_all_files(self, reset=False):
+    def download_ghcnd_files(self, reset=False):
+        """Download the complete list."""
         for file_name in FILE_NAMES:
             url = BASE_DOWNLOAD_URL + file_name
             self.download_file(url, reset)
 
-    # Download the file, unless it exists.
+    def download_mshr_files(self, reset=False):
+        for url in MSHR_URLS:
+            self.download_file(url, reset)
+
     def download_file(self, url, reset=False):
+        """Download the file, unless it exists."""
         file_name = self.save_path + "/" + url.split('/')[-1]
 
         if not os.path.isfile(file_name) or reset:
@@ -47,8 +53,8 @@ class WeatherDownloadFiles:
             urllib.urlretrieve(url, file_name, report_download_status)
             print
 
-    # Unzip the package file, unless it exists.
-    def unzip_package(self, package, reset=False):
+    def unzip_ghcnd_package(self, package, reset=False):
+        """Unzip the package file, unless it exists."""
         file_name = self.save_path + "/" + package + ".tar.gz"
         unzipped_path = self.save_path + "/" + package
         
@@ -60,16 +66,25 @@ class WeatherDownloadFiles:
             tar_file = tarfile.open(file_name, 'r:gz')
             tar_file.extractall(unzipped_path)
  
-# Report download status.
+    def unzip_mshr_files(self, reset=False):
+        """Unzip the package file, unless it exists."""
+        for url in MSHR_URLS:
+            if url.endswith('.zip'):
+                file_name = self.save_path + "/" + url.split('/')[-1]
+                print "Unzipping: " + file_name
+                with zipfile.ZipFile(file_name, 'r') as myzip:
+                    myzip.extractall(self.save_path)
+ 
 def report_download_status(count, block, size):
+    """Report download status."""
     line_size = 50
     erase = "\b" * line_size
     sys.stdout.write(erase)
     report = get_report_line((float(count) * block / size), line_size)
     sys.stdout.write(report)
 
-# Creates a string to be used in reporting the percentage done.
 def get_report_line(percentage, line_size):
+    """Creates a string to be used in reporting the percentage done."""
     report = ""
     for i in range(0, line_size):
         if (float(i) / line_size < percentage):
@@ -78,8 +93,8 @@ def get_report_line(percentage, line_size):
             report += "-"
     return report
             
-# Download the file, unless it exists.
 def download_file_save_as(url, new_file_name, reset=False):
+    """Download the file, unless it exists."""
     if not os.path.isfile(new_file_name) or reset:
         print "Downloading: " + url
         urllib.urlretrieve(url, new_file_name, report_download_status)

Reply via email to