"""The ``lxml.isoschematron`` package implements ISO Schematron support on top
of the pure-xslt 'skeleton' implementation.
"""
import sys
import os.path
from lxml import etree as _etree # due to validator __init__ signature
# some compat stuff, borrowed from lxml.html
try:
unicode
except NameError:
# Python 3
unicode = str
try:
basestring
except NameError:
# Python 3
basestring = str
__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
'iso_abstract_expand', 'iso_svrl_for_xslt1',
'svrl_validation_errors', 'schematron_schema_valid',
'stylesheet_params', 'Schematron']
# some namespaces
#FIXME: Maybe lxml should provide a dedicated place for common namespace
#FIXME: definitions?
XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
# some helpers
_schematron_root = '{%s}schema' % SCHEMATRON_NS
_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
# the iso-schematron skeleton implementation steps aka xsl transformations
extract_xsd = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
extract_rng = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
iso_dsdl_include = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
'iso_dsdl_include.xsl')))
iso_abstract_expand = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
'iso_abstract_expand.xsl')))
iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir,
'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
# svrl result accessors
svrl_validation_errors = _etree.XPath(
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
# RelaxNG validator for schematron schemas
schematron_schema_valid = _etree.RelaxNG(
file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
def stylesheet_params(**kwargs):
"""Convert keyword args to a dictionary of stylesheet parameters.
XSL stylesheet parameters must be XPath expressions, i.e.:
* string expressions, like "'5'"
* simple (number) expressions, like "5"
* valid XPath expressions, like "/a/b/text()"
This function converts native Python keyword arguments to stylesheet
parameters following these rules:
If an arg is a string wrap it with XSLT.strparam().
If an arg is an XPath object use its path string.
If arg is None raise TypeError.
Else convert arg to string.
"""
result = {}
for key, val in kwargs.items():
if isinstance(val, basestring):
val = _etree.XSLT.strparam(val)
elif val is None:
raise TypeError('None not allowed as a stylesheet parameter')
elif not isinstance(val, _etree.XPath):
val = unicode(val)
result[key] = val
return result
# helper function for use in Schematron __init__
def _stylesheet_param_dict(paramsDict, kwargsDict):
"""Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
stylesheet arguments.
kwargsDict entries with a value of None are ignored.
"""
# beware of changing mutable default arg
paramsDict = dict(paramsDict)
for k, v in kwargsDict.items():
if v is not None: # None values do not override
paramsDict[k] = v
paramsDict = stylesheet_params(**paramsDict)
return paramsDict
class Schematron(_etree._Validator):
"""An ISO Schematron validator.
Pass a root Element or an ElementTree to turn it into a validator.
Alternatively, pass a filename as keyword argument 'file' to parse from
the file system.
Schematron is a less well known, but very powerful schema language.
The main idea is to use the capabilities of XPath to put restrictions on
the structure and the content of XML documents.
The standard behaviour is to fail on ``failed-assert`` findings only
(``ASSERTS_ONLY``). To change this, you can either pass a report filter
function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
or a custom ``XPath`` object), or subclass isoschematron.Schematron for
complete control of the validation process.
Built on the Schematron language 'reference' skeleton pure-xslt
implementation, the validator is created as an XSLT 1.0 stylesheet using
these steps:
0) (Extract from XML Schema or RelaxNG schema)
1) Process inclusions
2) Process abstract patterns
3) Compile the schematron schema to XSLT
The ``include`` and ``expand`` keyword arguments can be used to switch off
steps 1) and 2).
To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
keyword arguments ``include_params``, ``expand_params`` or
``compile_params``.
For convenience, the compile-step parameter ``phase`` is also exposed as a
keyword argument ``phase``. This takes precedence if the parameter is also
given in the parameter dictionary.
If ``store_schematron`` is set to True, the (included-and-expanded)
schematron document tree is stored and available through the ``schematron``
property.
If ``store_xslt`` is set to True, the validation XSLT document tree will be
stored and can be retrieved through the ``validator_xslt`` property.
With ``store_report`` set to True (default: False), the resulting validation
report document gets stored and can be accessed as the ``validation_report``
property.
Here is a usage example::
>>> from lxml import etree
>>> from lxml.isoschematron import Schematron
>>> schematron = Schematron(etree.XML('''
...