Source code for rads.xml.lxml

"""XML tools using the lxml_ library.

.. _lxml: https://lxml.de/
"""

from typing import (
    IO,
    TYPE_CHECKING,
    Any,
    Iterator,
    Mapping,
    Optional,
    Sequence,
    Text,
    Union,
    cast,
)

from lxml import etree  # type: ignore
from lxml.etree import ETCompatXMLParser, ParseError, XMLParser  # type: ignore

from ..xml import base

# TODO: Change to functools.cached_property when dropping support for
#       Python 3.7
if TYPE_CHECKING:
    # property behaves properly with Mypy but cached_property does not, even
    # with the same type stub.
    cached_property = property
else:
    from cached_property import cached_property

__all__ = [
    "ParseError",
    "Element",
    "XMLParser",
    "parse",
    "fromstring",
    "fromstringlist",
    "error_with_file",
]


[docs]class Element(base.Element): """XML element that encapsulates an element from lxml_. Supports line number examination. .. _lxml: https://lxml.de/ """ def __init__(self, element: etree._Element, *, file: Optional[str] = None): """ :param: XML element from the lxml_ library. :param file: Optional filename/path the element is from. """ self._element = element self._file = file def __len__(self) -> int: return len(self._element) def __iter__(self) -> Iterator["Element"]: for e in self._element: yield Element(e, file=self._file)
[docs] def next(self) -> "Element": # noqa: D102 element = self._element.getnext() if element is None: raise StopIteration() return Element(element, file=self._file)
[docs] def prev(self) -> "Element": # noqa: D102 element = self._element.getprevious() if element is None: raise StopIteration() return Element(element, file=self._file)
[docs] def up(self) -> "Element": # noqa: D102 element = self._element.getparent() if element is None: raise StopIteration() return Element(element, file=self._file)
[docs] def down(self) -> "Element": # noqa: D102 # throws StopIteration if there are no children return Element(next(self._element.iterchildren()), file=self._file)
@property def file(self) -> str: if self._file: return self._file return cast(str, self._element.base) @property def opening_line(self) -> int: return cast(int, self._element.sourceline)
[docs] @cached_property def num_lines(self) -> int: return len(etree.tostring(self._element).strip().split(b"\n"))
[docs] @cached_property def closing_line(self) -> int: return self.opening_line + self.num_lines - 1
@property def tag(self) -> str: return cast(str, self._element.tag) @property def text(self) -> Optional[str]: return cast(str, self._element.text) @property def attributes(self) -> Mapping[str, str]: return cast(Mapping[str, str], self._element.attrib)
_ParserInputType = Union[bytes, Text] _FileOrFilename = Union[str, bytes, int, IO[Any]] # The following functions are here to make lxml more compatible with etree.
[docs]def parse( source: _FileOrFilename, parser: Optional[XMLParser] = None ) -> etree._ElementTree: """Parse XML document into element tree. This is wrapper around :func:`lxml.etree.parse` to make it behave like :func:`xml.etree.ElementTree.parse`. :param source: Filename or file object containing XML data. :param parser: Optional parser instance, defaulting to :class:`lxml.etree.ETCompatXMLParser`. :return: An ElementTree instance. """ if parser is None: parser = ETCompatXMLParser() return etree.parse(source, parser)
[docs]def fromstring( text: _ParserInputType, parser: Optional[XMLParser] = None ) -> etree._Element: """Parse XML document from string constant. This function can be used to embed 'XML Literals' in Python code. This is wrapper around :func:`lxml.etree.fromstring` to make it behave like :func:`xml.etree.ElementTree.fromtstring`. :param text: A string containing XML data. :param parser: Optional parser instance, defaulting to :class:`lxml.etree.ETCompatXMLParser`. :return: An Element instance. """ if parser is None: parser = ETCompatXMLParser() return etree.fromstring(text, parser)
[docs]def fromstringlist( sequence: Sequence[_ParserInputType], parser: Optional[XMLParser] = None ) -> etree._Element: """Parse XML document from sequence of string fragments. :param sequence: A list or other sequence of strings containing XML data. :param parser: Optional parser instance, defaulting to :class:`lxml.etree.ETCompatXMLParser`. :return: An Element instance. """ if parser is None: parser = ETCompatXMLParser() return etree.fromstringlist(sequence, parser)
[docs]def error_with_file(error: ParseError, file: str) -> ParseError: """Add filename to an XML parse error. :param error: Original XML parse error. :param file: Filename to add. :return: A new parse error (of the same type as `error`) with the `filename` added. """ error.filename = file return type(error)( error.msg, error.code, error.position[0], error.position[1], file )