On 1/3/23 22:57, aapost wrote:
I am trying to wrap my head around how one goes about working with and editing xml elements ... Back to contemplating and tinkering..

For anyone in a similar situation, xmlschema is actually quite nice.

It didn't have the features I was looking for out of the box, but it does have a to_objects function and I have learned quite a bit while picking it apart. I am able to patch it to be good enough for my requirements.

Below is the patch for anyone interested:

#
# Contribution for the xmlschema & elementpath python modules which are
# Copyright (c), 2016-2020, SISSA (International School for Advanced Studies).
# All rights reserved.
#
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#

# Patching and expansion of the xmlschema.dataobjects.DataElement object features # to get the best demonstration, change schema variable to your .xsd, and xmlobj to your .xml files
# then run this as $ python -i filename.py

from typing import Any, Optional, Union, Tuple
#from types import MethodType

class ValueLockedError(Exception):
  def __init__(self, obj, variable_name):
    self.message = "Can't set ." + variable_name + \
                   "\nThe object:\n" + str(obj) + \
                   "\nis Locked (._locked is set to True)"
    super().__init__(self.message)

# importing in order necessary for intended monkey patch
import elementpath.etree as ep_etree

# Monkey patching additional static functions to the import of elementpath.etree

# for namespace management of xml.etree.ElementTree code paths (which use
# the global variable register_namespace._namespace_map for namespace registering)
def etree_remove_registered_namespace(elem: ep_etree.ElementProtocol,
                                      uri: str = '') -> None:
  etree_module: Any
  if not ep_etree.is_etree_element(elem):
    raise TypeError(f"{elem!r} is not an Element")
  elif isinstance(elem, ep_etree.PyElementTree.Element):
    etree_module = ep_etree.PyElementTree
  elif not hasattr(elem, 'nsmap'):
    etree_module = ep_etree.ElementTree
  else:
    import lxml.etree as etree_module  # type: ignore[no-redef]

  if not hasattr(elem, 'nsmap'):
    if uri in etree_module.register_namespace._namespace_map:
      del etree_module.register_namespace._namespace_map[uri]
  else:
    # TODO research this for better understanding
    # _namespace_map is uri->prefix
    # DataElement.nsmap prefix->uri
    # lxml etree .nsmap ?->?
    # not using lxml anyway so not really an issue as
    # this condition shouldn't be met
    for key, value in elem.nsmap.items():
      # research - can there be multiple instances of uri to prefix?..
      # or are they intended to be 1:1?..
      if value == uri:
        if key in elem.nsmap:
          del elem.nsmap[key]

#patching
setattr(ep_etree, "etree_remove_registered_namespace",
        staticmethod(etree_remove_registered_namespace))

# for namespace management of xml.etree.ElementTree code paths (which use
# the global variable register_namespace._namespace_map for namespace registering)
def etree_get_registered_namespaces(elem: ep_etree.ElementProtocol) -> dict:
  etree_module: Any
  if not ep_etree.is_etree_element(elem):
    raise TypeError(f"{elem!r} is not an Element")
  elif isinstance(elem, ep_etree.PyElementTree.Element):
    etree_module = ep_etree.PyElementTree
  elif not hasattr(elem, 'nsmap'):
    etree_module = ep_etree.ElementTree
  else:
    import lxml.etree as etree_module  # type: ignore[no-redef]

  if not hasattr(elem, 'nsmap'):
    return etree_module.register_namespace._namespace_map
  else:
    return elem.nsmap # shouldn't be met

#patching
setattr(ep_etree, "etree_get_registered_namespaces",
        staticmethod(etree_get_registered_namespaces))

# for namespace management of xml.etree.ElementTree code paths (which use
# the global variable register_namespace._namespace_map for namespace registering)
def etree_register_namespace(elem: ep_etree.ElementProtocol,
                             prefix: str = None,
                             uri: str = None) -> None:
  etree_module: Any
  if not ep_etree.is_etree_element(elem):
    raise TypeError(f"{elem!r} is not an Element")
  elif isinstance(elem, ep_etree.PyElementTree.Element):
    etree_module = ep_etree.PyElementTree
  elif not hasattr(elem, 'nsmap'):
    etree_module = ep_etree.ElementTree
  else:
    import lxml.etree as etree_module  # type: ignore[no-redef]

  if prefix != None and uri != None:
    if not hasattr(elem, 'nsmap'):
      etree_module.register_namespace(prefix, uri)
    else:
      # TODO research this for better understanding
      # _namespace_map is uri->prefix
      # DataElement.nsmap prefix->uri
      # lxml etree .nsmap ?->?
      # not using lxml anyway so not really an issue as
      # this condition shouldn't be met
      elem.nsmap[prefix] = uri

#patching
setattr(ep_etree, "etree_register_namespace",
        staticmethod(etree_register_namespace))


# importing in order necessary for intended monkey patch
import xmlschema

# Monkey patching additional instance functions to the import of xmlschema
# specifically xmlschema.dataobjects.DataElement

# Instance functions so DataElement object can use above elementpath.etree namespace functions
def register_namespace(self, prefix: str = None, uri: str = None) -> None:
  #root = self.encode(validation='strict')
  root, errors = self.encode(validation='lax')
  if prefix != None and uri != None:
    ep_etree.etree_register_namespace(root, prefix, uri)

#patching
setattr(xmlschema.dataobjects.DataElement, "register_namespace", register_namespace)

def remove_registered_namespace(self, uri: str = '') -> None:
  #root = self.encode(validation='strict')
  root, errors = self.encode(validation='lax')
  ep_etree.etree_remove_registered_namespace(root, uri)

#patching
setattr(xmlschema.dataobjects.DataElement, "remove_registered_namespace", remove_registered_namespace)

def get_registered_namespaces(self) -> dict:
  #root = self.encode(validation='strict')
  root, errors = self.encode(validation='lax')
  return ep_etree.etree_get_registered_namespaces(root)

#patching
setattr(xmlschema.dataobjects.DataElement, "get_registered_namespaces", get_registered_namespaces)


# replacing .validate() & .is_valid() on DataElement so that namespaces from the DataElement # get set to the xml.etree.ElementTree register_namespace._namespace_map global when used
def validate(self, use_defaults: bool = True,
             namespaces: Optional[xmlschema.aliases.NamespacesType] = None,
             max_depth: Optional[int] = None) -> None:
  """
  Validates the XML data object.
  :raises: :exc:`XMLSchemaValidationError` if XML data object is not valid.
:raises: :exc:`XMLSchemaValueError` if the instance has no schema bindings.
  """
  if (self.nsmap and namespaces == None): #added code
    namespaces = self.nsmap #added code
  for error in self.iter_errors(use_defaults, namespaces, max_depth):
    raise error

#patching
setattr(xmlschema.dataobjects.DataElement, "validate", validate)

def is_valid(self, use_defaults: bool = True,
             namespaces: Optional[xmlschema.aliases.NamespacesType] = None,
             max_depth: Optional[int] = None) -> bool:
  """
  Like :meth:`validate` except it does not raise an exception on validation
  error but returns ``True`` if the XML data object is valid, ``False`` if
  it's invalid.

:raises: :exc:`XMLSchemaValueError` if the instance has no schema bindings. :raises: :exc:`XMLSchemaValueError` if the instance has no schema bindings.
  """
  if (self.nsmap and namespaces == None): #added code
    namespaces = self.nsmap #added code
  error = next(self.iter_errors(use_defaults, namespaces, max_depth), None)
  return error is None

#patching
setattr(xmlschema.dataobjects.DataElement, "is_valid", is_valid)


# replace .tostring() on DataElement to allow for xml_declaration/encoding support
# TODO research more, will likely customize a bit further
def tostring(self,
             namespaces: Optional[xmlschema.aliases.NamespacesType] = None,
             indent: str = '',
             max_lines: Optional[int] = None,
             spaces_for_tab: Optional[int] = None,
             xml_declaration: Optional[bool] = None,
             encoding: str = 'unicode',
             method: str = 'xml') -> Any:

  if (self.nsmap and namespaces == None):
    namespaces = self.nsmap

  # Serializes the data element tree to an XML source string.
  # root, errors = self.encode(validation='lax')
root = self.encode(validation="strict") #prefer strict on my output just in case..
  return ep_etree.etree_tostring(
    root, namespaces, indent, max_lines, spaces_for_tab,
    xml_declaration, encoding, method)

#patching
setattr(xmlschema.dataobjects.DataElement, "tostring", tostring)


# add get_value function - paired with set_value
def get_value(self) -> Any:
  print(type(self))
  return self.value

#patching
setattr(xmlschema.dataobjects.DataElement, "get_value", get_value)

# add set_value function
# assures change meets XMLSchema
# reverts back on error
# assumes data meets Schema to begin with, will remain unchanged in the end if it is not # :raises: :exc:`XMLSchemaValidationError` if XML data object is not valid after attempted change # :raises: :exc:`XMLSchemaValueError` if the instance has no schema bindings.
# :raises: :exc:`ValueLockedError` if using ._locked and set to True
def set_value(self,
              value: Any,
              use_defaults: bool = True,
namespaces: Optional[xmlschema.aliases.NamespacesType] = None,
              max_depth: Optional[int] = None) -> None:
  if hasattr(self, "_locked") and self._locked == True:
    raise ValueLockedError(self, variable_name='value')
  else:
    if hasattr(self, "_locked"):
      self._locked = True

    self._set_value_temp_value = self.value

    self.value = value

    if (self.nsmap and namespaces == None):
      namespaces = self.nsmap
    for error in self.iter_errors(use_defaults, namespaces, max_depth):
self.value = self._set_value_temp_value # revert value back to original
      del self._set_value_temp_value #clean up
      if hasattr(self, "_locked"): # unlock before raising if using/exists
        self._locked = False
      raise error # raise error

    # no errors

    del self._set_value_temp_value # clean up
    if hasattr(self, "_locked"):
      self._locked = False # unlock before returning if using/exists

#patching
setattr(xmlschema.dataobjects.DataElement, "set_value", set_value)


# add get_attrib function - paired with set_attrib
# remove added logic from .get(), requiring explicit matches only
def get_attrib(self, key: str) -> Any:
  return self.attrib[key]

#patching
setattr(xmlschema.dataobjects.DataElement, "get_attrib", get_attrib)

# add set_attrib function
# assures change meets XMLSchema
# reverts back on error
# assumes data meets Schema to begin with, will remain unchanged in the end if it is not # :raises: :exc:`XMLSchemaValidationError` if XML data object is not valid after attempted change # :raises: :exc:`XMLSchemaValueError` if the instance has no schema bindings.
# :raises: :exc:`ValueLockedError` if using ._locked and set to True
def set_attrib(self,
               key: str,
               value: Any,
               use_defaults: bool = True,
namespaces: Optional[xmlschema.aliases.NamespacesType] = None, max_depth: Optional[int] = None) -> Union[bool, Optional[Tuple[bool, str]]]:
  if hasattr(self, "_locked") and self._locked == True:
    raise ValueLockedError(self, variable_name='attrib[' + key + ']')
  else:
    if hasattr(self, "_locked"):
      self._locked = True

    if key in self.attrib:
self._set_attrib_temp_value = self.attrib[key] # save original value if exists
    else:
self._set_attrib_value_did_not_exist = True # or note if it doesn't exist

    self.attrib[key] = value

    if (self.nsmap and namespaces == None):
      namespaces = self.nsmap
    for error in self.iter_errors(use_defaults, namespaces, max_depth):
      if hasattr(self, '_set_attrib_temp_value'):
self.attrib[key] = self._set_attrib_temp_value # revert value back to original if existed
        del self._set_attrib_temp_value
      elif hasattr(self, '_set_attrib_value_did_not_exist'):
        del self.attrib[key] # or just delete if it didn't
        del self._set_attrib_value_did_not_exist
      if hasattr(self, "_locked"):
        self._locked = False
      raise error

    # no errors

    if hasattr(self, '_set_attrib_temp_value'):
      del self._set_attrib_temp_value # clean up
    elif hasattr(self, '_set_attrib_value_did_not_exist'):
      del self._set_attrib_value_did_not_exist # clean up

# TODO research @property / some or some type of better variable binding? # self._expand_xDE_attrib_prefix exists if expand_xmlschema_DataElement is run
    if hasattr(self, '_expand_xDE_attrib_prefix'):
      setattr(self, self._expand_xDE_attrib_prefix + key, value)

    if hasattr(self, "_locked"):
      self._locked = False # unlock before returning if using/exists

#patching
setattr(xmlschema.dataobjects.DataElement, "set_attrib", set_attrib)

# add del_attrib function
# assures change meets XMLSchema
# reverts back on error
# assumes data meets Schema to begin with, will remain unchanged in the end if it is not # :raises: :exc:`XMLSchemaValidationError` if XML data object is not valid after attempted change # :raises: :exc:`XMLSchemaValueError` if the instance has no schema bindings.
# :raises: :exc:`ValueLockedError` if using ._locked and set to True
# :raises: :exc:`KeyError` if xml tag attribute (.attrib[key]) doesn't exist
def del_attrib(self,
               key: str,
               use_defaults: bool = True,
namespaces: Optional[xmlschema.aliases.NamespacesType] = None, max_depth: Optional[int] = None) -> Union[bool, Optional[Tuple[bool, str]]]:
  if hasattr(self, "_locked") and self._locked == True:
    raise ValueLockedError(self, variable_name='attrib[' + key + ']')
  else:
    if hasattr(self, "_locked"):
      self._locked = True

    if key in self.attrib:
self._del_attrib_temp_value = self.attrib[key] # save original value if exists
    else:
      if hasattr(self, "_locked"):
        self._locked = False
raise KeyError("'" + key + "' Attribute does not exist, nothing to do")

    del self.attrib[key]

    if (self.nsmap and namespaces == None):
      namespaces = self.nsmap
    for error in self.iter_errors(use_defaults, namespaces, max_depth):
      if hasattr(self, '_del_attrib_temp_value'):
self.attrib[key] = self._del_attrib_temp_value # attribute required, recreate value back to original
        del self._del_attrib_temp_value
      if hasattr(self, "_locked"):
        self._locked = False
      # append informational message to error output
      if hasattr(error, "message"):
error.message += ":\n\nThe attribute value was returned to original state due to error" \ "\n\nThis error represents the state of this element IF the attribute were removed"
      raise error

    # no errors

    if hasattr(self, '_del_attrib_temp_value'):
      del self._del_attrib_temp_value # clean up

# TODO research @property / some or some type of better variable binding? # self._expand_xDE_attrib_prefix exists if expand_xmlschema_DataElement is run
    if hasattr(self, '_expand_xDE_attrib_prefix'):
      delattr(self, self._expand_xDE_attrib_prefix + key)

    if hasattr(self, "_locked"):
      self._locked = False # unlock before returning if using/exists

#patching
setattr(xmlschema.dataobjects.DataElement, "del_attrib", del_attrib)


# Monkey patching some class methods helpful for learning / troubleshooting
@classmethod
def _show_me_mro(cls):
  return cls.mro()

setattr(xmlschema.validators.schemas.XsdValidator, "_show_me_mro", classmethod(_show_me_mro)) setattr(xmlschema.dataobjects.DataElement, "_show_me_mro", classmethod(_show_me_mro))


schema = xmlschema.XMLSchema("path/to/your.xsd", converter=xmlschema.JsonMLConverter)
xmlobj = schema.to_objects("path/to/your.xml")


# creates dot notation naming for all children recursively
# c_ default prefix for child, a_ default prefix for tag attribute
# _# numbered suffix for all children starting at 0
# increases from there if more than 1 child with same name
def expand_xmlschema_DataElement(xsobj: xmlschema.dataobjects.DataElement,
                                 child_prefix: str = 'c_',
                                 attrib_prefix: str = 'a_') -> None:
  xsobj._expand_xDE_child_prefix = child_prefix
  xsobj._expand_xDE_attrib_prefix = attrib_prefix

  # _locked just an idea at the moment, may or may not use this in the end
  setattr(xsobj, "_locked", False)

  # set a class attribute for each xml tag attribute
# DO NOT change these directly, use set_attrib on the parent class which changes .attrib first
  # These are currently just a copy of what is in the .attrib dict
# Validation has no knowledge of their existence if they are changed outside of design
  # TODO research @property / or some type of better variable binding?
  if (xsobj.attrib):
    #print(xsobj.local_name + " has attributes")
    for key in xsobj.attrib.keys():
setattr(xsobj, xsobj._expand_xDE_attrib_prefix + key, xsobj.attrib[key])

  # set a class attribute for each child
  for each in xsobj.iterchildren():
    expand_xmlschema_DataElement(each)
    count = 0
    while(True):
if hasattr(xsobj, xsobj._expand_xDE_child_prefix + each.local_name + "_" + str(count)):
        count += 1
      else:
setattr(xsobj, xsobj._expand_xDE_child_prefix + each.local_name + "_" + str(count), each)
        break

expand_xmlschema_DataElement(xmlobj)
--
https://mail.python.org/mailman/listinfo/python-list

Reply via email to