Changeset: 2b93ec7dc119 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2b93ec7dc119 Modified Files: clients/iotclient/documentation/index.rst clients/iotclient/documentation/streams_data_types.rst clients/iotclient/src/Streams/datatypes.py clients/iotclient/src/Streams/jsonschemas.py clients/iotclient/src/Streams/streamscreator.py Branch: iot Log Message:
Added more data types for validation and cleaned code diffs (truncated from 803 to 300 lines): diff --git a/clients/iotclient/documentation/index.rst b/clients/iotclient/documentation/index.rst --- a/clients/iotclient/documentation/index.rst +++ b/clients/iotclient/documentation/index.rst @@ -17,4 +17,5 @@ Contents: iot_server_arguments.rst restful_resources.rst streams_data_types.rst - conclusion.rst + conclusion.rst + diff --git a/clients/iotclient/documentation/streams_data_types.rst b/clients/iotclient/documentation/streams_data_types.rst --- a/clients/iotclient/documentation/streams_data_types.rst +++ b/clients/iotclient/documentation/streams_data_types.rst @@ -15,8 +15,8 @@ By default a column is not nullable, but Text Types ========== -Text, String, Character Large Object ------------------------------------- +Text, String, CLOB, Character Large Object +------------------------------------------ MonetDB's string types with unbounded length. The insertion must be provided as a JSON string. @@ -30,15 +30,37 @@ UUID An *Universally Unique Identifier* according to `RFC 4122 <https://www.ietf.org/rfc/rfc4122.txt>`_. The insertion as a JSON string is validated against the regular expression :code:`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`. +MAC +--- + +A *Media Access Control Address* identifier. The insertion as a JSON string is validated against the regular expression :code:`^([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})$`. As MonetDB doesn't have a MAC equivalent data type yet, the data is stored as :code:`char(17)`. + +URL +--- + +An *Uniform Resource Locator* as a specific type of an URI is validated according to `RFC 3987 <https://www.ietf.org/rfc/rfc3987.txt>`_. The insertion must also be a JSON String. + Inet ---- An *IPv4* address. The insertion as a JSON string is validated against the regular expression :code:`^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$` with further semantic validation. -URL ---- +InetSix +------- -An *Uniform Resource Locator* as a specific type of an URI is validated according to `RFC 3987 <https://www.ietf.org/rfc/rfc3987.txt>`_. The insertion must also be a JSON String. +An *IPv6* address. The value must be a JSON String. As MonetDB doesn't have an Ipv6 equivalent data type yet, the data is stored as :code:`char(45)`. + +.. warning:: Currently this validation is made with Python *socket.inet_pton()* function that is currently available only in some UNIX systems. `Check the documentation for further details <https://docs.python.org/2.7/library/socket.html#socket.inet_pton>`_. + +Regex +----- + +A string allways validated with a provided regular expression. The JSON must contain a :code:`regex` key with the regular expression. As MonetDB doesn't have a Regex equivalent data type yet, the data is stored as :code:`string`. + +Enum +---- + +A SQL :code:`CHAR` type validated against a pre-defined array. During creation, the :code:`values` key must be present with a JSON array of Strings containing the values of the enum (ex: :code:`"values": ["red", "blue", "green"]`). The default value if provided must be one of the values. Number Types ============ @@ -90,22 +112,14 @@ As in numbers, a minimum and maximum val Date ---- -A regular date in the Gregorian Calendar, expressed in format "YYYY-MM-DD". +A regular date in the Gregorian Calendar, expressed in format :code:`YYYY-MM-DD`. Time ---- -The time of day expressed in format "HH:MM:SS.sss". +The time of day expressed in format :code:`HH:MM:SS.sss`. If the pair :code:`"timezone": false` is provided, the timezone will be truncated. Timestamp --------- -A timestamp according to `RFC 3339 <https://www.ietf.org/rfc/rfc3339.txt>`_ with timezone. The regular expression is the standard `ISO 8601 with timezone format <https://en.wikipedia.org/wiki/ISO_8601>`_. - -Enum Type -========= - -Enum ----- - -A SQL :code:`CHAR` type validated against a pre-defined array. During creation, the :code:`values` key must be present with a JSON array of Strings containing the values of the enum (ex: :code:`"values": ["red", "blue", "green"]`). The default value if provided must be one of the values. +A timestamp according to `RFC 3339 <https://www.ietf.org/rfc/rfc3339.txt>`_ with timezone. The regular expression is the standard `ISO 8601 with timezone format <https://en.wikipedia.org/wiki/ISO_8601>`_. If the pair :code:`"timezone": false` is provided, the timezone will be truncated. diff --git a/clients/iotclient/src/Streams/datatypes.py b/clients/iotclient/src/Streams/datatypes.py --- a/clients/iotclient/src/Streams/datatypes.py +++ b/clients/iotclient/src/Streams/datatypes.py @@ -1,13 +1,15 @@ +import copy import datetime import dateutil import itertools +import math +import re import struct -import copy -import math + from abc import ABCMeta, abstractmethod from dateutil import parser -from jsonschemas import UUID_REGEX +from jsonschemas import UUID_REGEX, MAC_ADDRESS_REGEX, TIME_REGEX # Later check the byte order https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment # Also check the consequences of aligment on packing HUGEINTs! @@ -59,9 +61,11 @@ class StreamDataType(object): def get_default_value(self): # get the default value representation in the data type return self._default_value - @abstractmethod def add_json_schema_entry(self, schema): # add the entry for the stream's corresponding json schema - pass # must be done after setting the default value!!! + dic = {} # must be done after setting the default value!!! + if hasattr(self, '_default_value'): + dic['default'] = self._default_value + schema[self._column_name] = dic def prepare_parameters(self): # prepare arguments for the binary conversion return {} @@ -86,7 +90,6 @@ class StreamDataType(object): if errors: raise DataValidationException(errors=errors) - return self.pack_parsed_values(extracted_values, counter, parameters) def to_json_representation(self): # get a json representation of the data type while checking the stream's info @@ -95,30 +98,35 @@ class StreamDataType(object): json_data['default'] = self._default_value return json_data - @abstractmethod - def get_sql_params(self): # get other possible parameters such as if nullable, default value, maximum and minimum - return [] + def process_sql_parameters(self, array): # get other possible parameters such as a limit, minimum and maximum + pass def create_stream_sql(self): # get column creation statement on SQL - array = [self._column_name, " "] - array.extend(self.get_sql_params()) + array = [self._column_name, " ", self._data_type] + self.process_sql_parameters(array) # add extra parameters to the SQL statement + + if self._default_value is not None: + array.extend([" DEFAULT '", str(self._default_value), "'"]) + if not self._is_nullable: + array.append(" NOT NULL") return ''.join(array) -class BaseTextType(StreamDataType): - __metaclass__ = ABCMeta +class TextType(StreamDataType): + """Covers: TEXT, STRING, CLOB and CHARACTER LARGE OBJECT""" def __init__(self, **kwargs): - super(BaseTextType, self).__init__(**kwargs) + super(TextType, self).__init__(**kwargs) def get_nullable_constant(self): return NIL_STRING def set_default_value(self, default_value): - self._default_value = None + self._default_value = default_value def add_json_schema_entry(self, schema): - pass + super(TextType, self).add_json_schema_entry(schema) + schema[self._column_name]['type'] = 'string' def prepare_parameters(self): return {'lengths_sum': 0} @@ -132,78 +140,156 @@ class BaseTextType(StreamDataType): string_pack = "".join(extracted_values) return struct.pack(ALIGNMENT + str(parameters['lengths_sum']) + 's', string_pack) - def get_sql_params(self): - return [] +class UUIDType(TextType): + """Covers: UUID""" -class TextType(BaseTextType): - """Covers: CHAR, VARCHAR, CHARACTER VARYING, TEXT, STRING, CLOB and CHARACTER LARGE OBJECT - Also Inet, URL and UUID""" + def __init__(self, **kwargs): + super(UUIDType, self).__init__(**kwargs) + + def add_json_schema_entry(self, schema): + super(UUIDType, self).add_json_schema_entry(schema) + schema[self._column_name]['pattern'] = UUID_REGEX + + def prepare_parameters(self): + return {} + + def process_next_value(self, entry, counter, parameters, errors): + return str(entry) + '\n' + + def pack_parsed_values(self, extracted_values, counter, parameters): + string_pack = "".join(extracted_values) + return struct.pack(ALIGNMENT + str(37 * counter) + 's', string_pack) + + +class MACType(TextType): + """Covers: MAC addresses""" + + def __init__(self, **kwargs): + super(MACType, self).__init__(**kwargs) + + def add_json_schema_entry(self, schema): + super(MACType, self).add_json_schema_entry(schema) + schema[self._column_name]['pattern'] = MAC_ADDRESS_REGEX + + def prepare_parameters(self): + return {} + + def process_next_value(self, entry, counter, parameters, errors): + return str(entry) + '\n' + + def pack_parsed_values(self, extracted_values, counter, parameters): + string_pack = "".join(extracted_values) + return struct.pack(ALIGNMENT + str(18 * counter) + 's', string_pack) + + def process_sql_parameters(self, array): + array[2] = 'char(17)' # A MAC Address has 17 characters + + +class URLType(TextType): + """Covers: URL""" + + def __init__(self, **kwargs): + super(URLType, self).__init__(**kwargs) + + def add_json_schema_entry(self, schema): + super(URLType, self).add_json_schema_entry(schema) + schema[self._column_name]['format'] = 'uri' + + +class INet(TextType): + """Covers: Inet""" + + def __init__(self, **kwargs): + super(INet, self).__init__(**kwargs) + + def add_json_schema_entry(self, schema): + super(INet, self).add_json_schema_entry(schema) + schema[self._column_name]['format'] = 'ipv4' + + +class INetSix(TextType): + """Covers: Inet6""" + + def __init__(self, **kwargs): + super(INetSix, self).__init__(**kwargs) + + def add_json_schema_entry(self, schema): + super(INetSix, self).add_json_schema_entry(schema) + schema[self._column_name]['format'] = 'ipv6' + + # http://stackoverflow.com/questions/166132/maximum-length-of-the-textual-representation-of-an-ipv6-address + def process_sql_parameters(self, array): + array[2] = 'char(45)' + + +class RegexType(TextType): + """Covers: Regex""" def __init__(self, **kwargs): super(TextType, self).__init__(**kwargs) - if 'limit' in kwargs: - self._limit = int(kwargs['limit']) + self._regex = re.compile(kwargs['regex']) + + def set_default_value(self, default_value): + if self._regex.match(default_value) is None: + raise Exception('The default value does not match with the regular expression!') + self._default_value = default_value def add_json_schema_entry(self, schema): - dic = {"type": "string"} + super(RegexType, self).add_json_schema_entry(schema) + schema[self._column_name]['pattern'] = self._regex - if hasattr(self, '_limit'): # limit is not used in uri, inet or uuid - dic['maxLength'] = self._limit - elif self._data_type == 'url': - dic['format'] = 'uri' - elif self._data_type == 'inet': - dic['format'] = 'ipv4' - elif self._data_type == 'uuid': - dic['pattern'] = UUID_REGEX _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list