maplib

  1# r'''
  2# # Overview
  3#
  4# '''
  5
  6import os
  7
  8# use default memory pool to prevent segmentation fault
  9os.environ['ARROW_DEFAULT_MEMORY_POOL'] = 'system'
 10import pyarrow as pa
 11import logging
 12
 13logger = logging.getLogger(__name__)
 14
 15__all__ = [
 16    "Model",
 17    "a",
 18    "Triple",
 19    "SolutionMappings",
 20    "IndexingOptions",
 21    "ValidationReport",
 22    "Instance",
 23    "Template",
 24    "Argument",
 25    "Parameter",
 26    "Variable",
 27    "RDFType",
 28    "xsd",
 29    "rdf",
 30    "rdfs",
 31    "owl",
 32    "IRI",
 33    "Literal",
 34    "Prefix",
 35    "BlankNode",
 36    "explore",
 37    "add_triples",
 38    "generate_templates",
 39    "MaplibException",
 40]
 41
 42import pathlib
 43from importlib.metadata import version
 44from .maplib import *
 45from .adding_triples import add_triples
 46from .template_generator import generate_templates
 47
 48"""
 49http://www.w3.org/1999/02/22-rdf-syntax-ns#type
 50"""
 51a = rdf.type
 52
 53if (pathlib.Path(__file__).parent.resolve() / "graph_explorer").exists():
 54    from .graph_explorer import explore as _explore
 55else:
 56
 57    def _explore(
 58            m: "Model",
 59            host: str = "localhost",
 60            port: int = 8000,
 61            bind: str = "localhost",
 62            popup=True,
 63            fts=True,
 64            fts_path: str = "fts",
 65    ):
 66        print("Contact Data Treehouse to try!")
 67
 68if (pathlib.Path(__file__).parent.resolve() / "opc_ua").exists():
 69    from .opc_ua import map_opc_ua as _map_opc_ua
 70else:
 71    from typing import Union
 72    import pathlib as Path
 73    def _map_opc_ua(
 74            m:"Model",
 75            folder:Union[Path, str],
 76            graph:str
 77    ):
 78        print("Contact Data Treehouse to try!")
 79
 80
 81def explore(*args, **kwargs):
 82    """Deprecated way to start an explore session.
 83Use the explore method on a Model object instead
 84"""
 85    logger.warn("Calling `maplib.explore` is deprecated, use `m.explore()` on a `Model` object instead")
 86    if kwargs.get("popup") == None or kwargs.get("popup") == True:
 87        logger.warn("""Calling explore without a popup argument defaults to it being on.
 88The popup argument is deprecated, so if you are relying on explore() opening a browser window
 89please change this to something like
 90
 91```
 92import webbrowser
 93from maplib import Model
 94
 95m = Model()
 96...
 97s = m.explore()
 98webbrowser.open(s.url, new=2)
 99```
100""")
101        kwargs["popup"] = True
102    elif kwargs.get("popup") == False:
103        logger.warn("The new explore function on a Model, no longer defaults to popping up the browser ")
104
105    return _explore(*args, **kwargs)
106
107
108__version__ = version("maplib")
class Model:

A model session allowing:

  • Iterative model using OTTR templates
  • Interactive SPARQL querying and enrichment
  • SHACL validation

Usage:

>>> from maplib import Model
... doc = '''
... @prefix ex:<http://example.net/ns#>.
... ex:ExampleTemplate [?MyValue] :: {
...    ottr:Triple(ex:myObject, ex:hasValue, ?MyValue)
... } .'''
... m = Model()
... m.add_template(doc)
Parameters
  • documents: a stOTTR document or a list of these
  • indexing_options: options for indexing
Model(indexing_options: IndexingOptions = None)
def add_template(self, template: Union[Template, str]):

Add a template to the model. Overwrites any existing template with the same IRI.

Parameters
  • template: The template to add, as a stOTTR string or as a programmatically constructed Template.
Returns
def add_prefixes(self, prefixes: Dict[str, str]):

Add prefixes that will be used in parsing of SPARQL, Datalog and OTTR.

Usage:

>>> m.add_prefixes({"ex" : "http:://example.net/"})
Parameters
  • prefixes: Known prefixes
Returns
def truncate_graph(self, graph: str = None) -> None:

Removes all triples associated with the given graph from the triplestore, includes transient triples and full-text search entries.

Parameters
  • graph: The IRI of the graph to truncate.
def detach_graph(self, graph: str = None, preserve_name: bool = False) -> Model:

Detaches and returns a named graph as their own Model object. The named graph is removed from the original Model.

Parameters
  • graph: The name of the graph to detach. Defaults to the default graph.
  • preserve_name: Preserve the name of the graph in the new Model, defaults to False.
Returns

A model.

def map( self, template: Union[str, Template, IRI], data: Union[polars.dataframe.frame.DataFrame, SolutionMappings] = None, graph: str = None, validate_iris: bool = True) -> None:

Map a template using a DataFrame Usage:

>>> m.map("ex:ExampleTemplate", df)

If the template has no arguments, the df argument is not necessary.

Parameters
  • template: Template, IRI, IRI string or prefixed template name.
  • data: DataFrame where the columns have the same names as the template arguments (when piping the output of queries back in, use SolutionMappings)
  • graph: The IRI of the graph to add triples to.
  • validate_iris: Validate any IRI-columns.
def size(self, graph: str = None) -> int:

Get the number of triples in a graph.

Parameters
  • graph: The named graph we are returning the size for
Returns

The inferred N-Tuples.

def map_json( self, path_or_string: pathlib.Path | str, graph: str = None, transient: bool = True) -> None:

Map a JSON file or string to triples. Usage:

>>> m.map_json("my_doc.json")

or:

>>> m.map_json('{"my_key":[true, "abc"]}')
Parameters
  • path_or_string: Path to a JSON document or a JSON string.
  • graph: The IRI of the graph to add triples to. None is the default graph.
  • transient: Should the triples be included when serializing the graph?
def map_xml( self, path_or_string: pathlib.Path | str, graph: str = None, transient: bool = True) -> None:

Map an XML file or string to triples. Usage:

>>> m.map_xml("my_doc.xml")

or:

>>> m.map_xml('<root><child>value</child></root>')
Parameters
  • path_or_string: Path to an XML document or an XML string.
  • graph: The IRI of the graph to add triples to. None is the default graph.
  • transient: Should the triples be included when serializing the graph?
def map_triples( self, data: Union[polars.dataframe.frame.DataFrame, SolutionMappings] = None, predicate: str = None, graph: str = None, validate_iris: bool = True) -> None:

Map a template using a DataFrame with columns subject, object and predicate The predicate column can also be supplied as a string if it is the same for all rows. Usage:

>>> m.map_triples(df)

If the template has no arguments, the df argument is not necessary.

Parameters
  • data: DataFrame where the columns are named subject and object. May also contain a predicate-column. When piping the output of queries back in, use SolutionMappings.
  • verb: The uri of the verb.
  • graph: The IRI of the graph to add triples to.
  • validate_iris: Validate any IRI-columns.
def map_default( self, data: Union[polars.dataframe.frame.DataFrame, SolutionMappings], primary_key_column: str, dry_run: bool = False, graph: str = None, validate_iris: bool = True) -> str:

Create a default template and map it based on a dataframe. Usage:

>>> template_string = m.map_default(df, "myKeyCol")
... print(template_string)
Parameters
  • data: DataFrame where the columns have the same names as the template arguments (when piping the output of queries back in, use SolutionMappings)
  • primary_key_column: This column will be the subject of all triples in the generated template.
  • dry_run: Do not map the template, only return the string.
  • graph: The IRI of the graph to add triples to.
  • validate_iris: Validate any IRI-columns.
Returns

The generated template

def map_df(self, df: polars.dataframe.frame.DataFrame, graph: str = None):

Create a default template and map it based on a dataframe. Usage:

>>> df = pl.read_csv("my_csv.csv")
>>> m.map_df(df)
Parameters
  • df: DataFrame to map using Facade-X (using approximately the CSV-mapping)
  • graph: The IRI of the graph to add triples to.
Returns

None

def explore( self, host: str = 'localhost', port: int = 8000, bind: str = 'localhost', popup=False, fts=True, fts_path: str = 'fts', graph: str = None, page: str = None) -> 'ExploreServer':

Starts a graph explorer session. To run from Jupyter Notebook use:

>>> server = m.explore()
You can later stop the server with
>>> server.stop()
Parameters
  • host: The hostname that we will point the browser to.
  • port: The port where the graph explorer webserver listens on.
  • bind: Bind to the following host / ip.
  • fts: Enable full text search indexing
  • fts_path: Path to the fts index
  • graph: The named graph to explore, defaults to the default graph
  • page: We use this feature flag to test new frontends (try "new" or "yasgui")
def map_opc_ua(self, folder: Union[str, pathlib.Path], graph: str = None):

Map a folder of OPC UA NodeSet2XMLs to RDF. This folder MUST contain the Namespace 0 XML to work properly.

Usage:

>>> m.map_opc_ua("my_opc_ua_nodeset2_xmls", graph="urn:maplib:uagraphs")
Parameters
  • folder: OPC UA NodeSet2 XMLs are found in this folder
  • graph: The IRI of the graph to add triples to.
Returns

None

def add_virtualization( self, virtualized_database: maplib.VirtualizedDatabase, resources: Dict[str, Template]):
Parameters
  • virtualized_database: We call the query-function of this object.
  • resources: The templates associated with each resource
def query( self, query: str, parameters: Dict[str, SolutionMappings] = None, solution_mappings: bool = False, graph: str = None, streaming: bool = False, return_json: bool = False, include_transient: bool = True, max_rows: int = None, debug: bool = False) -> Union[polars.dataframe.frame.DataFrame, SolutionMappings, List[Union[polars.dataframe.frame.DataFrame, SolutionMappings, str]], NoneType]:

Query the contained knowledge graph using SPARQL Currently, SELECT, CONSTRUCT and INSERT are supported. Usage:

>>> df = model.query('''
... PREFIX ex:<http://example.net/ns#>
... SELECT ?obj1 ?obj2 WHERE {
...    ?obj1 ex:hasObj ?obj2
... }''')
... print(df)
Parameters
  • query: The SPARQL query string
  • parameters: PVALUES Parameters, for each parameter, the SolutionMappings containing corresponding mappings and types.
  • solution_mappings: Returns SolutionMappings with maplib-native formatting and with RDF typing. Useful for round-trips.
  • graph: The IRI of the graph to query.
  • streaming: Use Polars streaming
  • return_json: Return JSON string.
  • include_transient: Include transient triples when querying.
  • max_rows: Maximum estimated rows in result, helps avoid out-of-memory errors.
  • debug: Why does my query have no results?
Returns

DataFrame (Select), list of DataFrames (Construct) containing results, None for Insert-queries, or SolutionMappings when solution_mappings is set.

def update( self, update: str, parameters: Dict[str, SolutionMappings] = None, graph: str = None, streaming: bool = False, include_transient: bool = True, max_rows: int = None, debug: bool = False):

Insert the results of a Construct query in the graph. Useful for being able to use the same query for inspecting what will be inserted and actually inserting. Usage:

>>> m = Model(doc)
... # Omitted
... update_pizzas = '''
... ...'''
... m.update(update_pizzas)
Parameters
  • update: The SPARQL Update string
  • parameters: PVALUES Parameters, for each parameter, the SolutionMappings containing corresponding mappings and types.
  • streaming: Use Polars streaming
  • include_transient: Include transient triples when querying (but see "transient" above).
  • max_rows: Maximum estimated rows in result, helps avoid out-of-memory errors.
  • debug: Why does my query have no results?
Returns

None

def create_index( self, options: IndexingOptions = None, all: bool = True, graph: str = None):
Parameters
  • options: Indexing options
  • all: Apply to all existing and new graphs
  • graph: The graph where indexes should be added
Returns
def validate( self, shape_graph: str = None, data_graph: str = None, report_graph: str = None, inferences_graph: str = None, include_details: bool = False, include_conforms: bool = False, include_shape_graph: bool = True, streaming: bool = False, max_shape_constraint_results: int = None, only_shapes: List[str] = None, deactivate_shapes: List[str] = None, dry_run: bool = False, max_rows: int = None, serial: bool = False, max_iterations: Optional[int] = 100000, debug_rules: bool = False) -> ValidationReport:

Validate the contained knowledge graph using SHACL Assumes that the contained knowledge graph also contains SHACL Shapes.

Parameters
  • shape_graph: The IRI of the Shape Graph (defaults to the default graph).
  • data_graph: The IRI of the Data Graph (defaults to the default graph).
  • report_graph: If this IRI is supplied, the validation report (if any) is found in this named graph.
  • inferences_graph: If this IRI is supplied, any inference results from sh: rule can be found in this named graph.
  • include_details: Include details of SHACL evaluation alongside the report. Currently uses a lot of memory.
  • include_conforms: Include those results that conformed. Also applies to details.
  • solution_mappings: Returns SolutionMappings instead of DataFrame (includes types for columns).
  • streaming: Use Polars streaming
  • max_shape_constraint_results: Maximum number of results per shape and constraint. Reduces the size of the result set.
  • only_shapes: Validate only these shapes, None means all shapes are validated (must be IRI, cannot be used with deactivate_shapes).
  • deactivate_shapes: Disable validation of these shapes (must be IRI, cannot be used with deactivate_shapes).
  • dry_run: Only find targets of shapes, but do not validate them.
  • max_rows: Maximum estimated rows in underlying SPARQL results, helps avoid out-of-memory errors.
  • serial: Turns off most parallell validation of shapes.
  • max_iterations: Maximum number of iterations for SHACL rules.
  • debug_rules: Debug why rules returning no results do so. Included in rule log.
Returns

Validation report containing shape performance details and target counts and whether the graph conforms (report.conforms)

def shacl_report(self, /, graph=None, streaming=None):
def insert( self, query: str, parameters: Dict[str, SolutionMappings] = None, solution_mappings: bool = False, transient: bool = False, streaming: bool = False, source_graph: str = None, target_graph: str = None, include_transient: bool = True, max_rows: int = None, debug: bool = False):

Insert the results of a Construct query in the graph. Useful for being able to use the same query for inspecting what will be inserted and actually inserting. Usage:

>>> m = Model(doc)
... # Omitted
... hpizzas = '''
... PREFIX pizza:<https://github.com/magbak/maplib/pizza#>
... PREFIX ing:<https://github.com/magbak/maplib/pizza/ingredients#>
... CONSTRUCT { ?p a pizza:HeterodoxPizza }
... WHERE {
... ?p a pizza:Pizza .
... ?p pizza:hasIngredient ing:Pineapple .
... }'''
... m.insert(hpizzas)
Parameters
  • query: The SPARQL Insert query string
  • parameters: PVALUES Parameters, for each parameter, the SolutionMappings containing corresponding mappings and types.
  • solution_mappings: Returns SolutionMappings with maplib-native formatting and with RDF typing. Useful for round-trips.
  • transient: Should the inserted triples be transient?
  • source_graph: The IRI of the source graph to execute the construct query.
  • target_graph: The IRI of the target graph to insert into.
  • streaming: Use Polars streaming
  • include_transient: Include transient triples when querying (but see "transient" above).
  • max_rows: Maximum estimated rows in result, helps avoid out-of-memory errors.
  • debug: Why does my query have no results?
Returns

None

def read( self, file_path: Union[str, pathlib.Path], format: Literal['ntriples', 'turtle', 'rdf/xml', 'cim/xml', 'json-ld', 'hdt'] = None, base_iri: str = None, transient: bool = False, parallel: bool = None, checked: bool = True, graph: str = None, replace_graph: bool = False, triples_batch_size: int = 10000000, known_contexts: Dict[str, str] = None) -> None:

Reads triples from a file path. You can specify the format, or it will be derived using file extension, e.g. filename.ttl or filename.nt. Specify transient if you only want the triples to be available for further querying and validation, but not persisted using write-methods.

Usage:

>>> m.read("my_triples.ttl")
Parameters
  • file_path: The path of the file containing triples
  • format: One of "ntriples", "turtle", "rdf/xml", "json-ld", "cim/xml" or "hdt", otherwise it is inferred from the file extension.
  • base_iri: Base iri
  • transient: Should these triples be included when writing the graph to the file system?
  • parallel: Parse triples in parallel, currently only NTRiples and Turtle. Assumes all prefixes are in the beginning of the document. Defaults to true only for NTriples.
  • checked: Check IRIs etc.
  • graph: The IRI of the graph to read the triples into, if None, it will be the default graph.
  • replace_graph: Replace the graph with these triples? Will replace the default graph if no graph is specified.
  • triples_batch_size: Read this many triples in each batch.
  • known_contexts: Contexts in JSON-LD documents are resolved towards this dict.
def read_template(self, file_path: Union[str, pathlib.Path]) -> None:

Reads template(s) from a file path.

Usage:

>>> m.read("templates.ttl")
Parameters
  • file_path: The path of the file containing templates in stOTTR format
def reads( self, s: str, format: Literal['ntriples', 'turtle', 'rdf/xml', 'cim/xml', 'json-ld'], base_iri: str = None, transient: bool = False, parallel: bool = None, checked: bool = True, graph: str = None, replace_graph: bool = False, triples_batch_size: int = 10000000, known_contexts: Dict[str, str] = None) -> None:

Reads triples from a string. Specify transient if you only want the triples to be available for further querying and validation, but not persisted using write-methods.

Usage:

>>> m.reads(my_ntriples_string, format="ntriples")
Parameters
  • s: String containing serialized triples.
  • format: One of "ntriples", "turtle", "rdf/xml", "json-ld" or "cim/xml".
  • base_iri: Base iri
  • transient: Should these triples be included when writing the graph to the file system?
  • parallel: Parse triples in parallel, currently only NTRiples and Turtle. Assumes all prefixes are in the beginning of the document. Defaults to true for NTriples.
  • checked: Check IRIs etc.
  • graph: The IRI of the graph to read the triples into.
  • replace_graph: Replace the graph with these triples? Will replace the default graph if no graph is specified.
  • triples_batch_size: Number of triples to read in each batch.
  • known_contexts: Contexts in JSON-LD documents are resolved towards this dict.
def get_templates(self) -> List[Template]:

Return the OTTR templates currently held by the model (whether added as stOTTR or programmatically). The built-in ottr:Triple primitive is not included.

Usage:

>>> for t in m.get_templates():
...     print(t)
Returns

A list of Template objects.

def templates_to_graph(self, graph: str = None) -> None:

Materialize the model's OTTR templates into a named graph as RDF, using the flattened maplib template vocabulary (prefix maplib, base https://datatreehouse.github.io/maplib/vocab#). This lets template structure and the interconnectedness of IRIs across templates be inspected with ordinary SPARQL, and used to derive SHACL shapes. The triples are added alongside any existing content of the target graph (the graph is not replaced).

Usage:

>>> m.templates_to_graph("https://example.org/templates")
>>> m.query('''
... PREFIX maplib: <https://datatreehouse.github.io/maplib/vocab#>
... SELECT ?template ?iri WHERE {
...     GRAPH <https://example.org/templates> { ?template maplib:referencesIri ?iri }
... }''')
Parameters
  • graph: The IRI of the graph to add the template triples to. Defaults to the default graph.
def write( self, file_path: Union[str, pathlib.Path], format=typing.Literal['ntriples', 'turtle', 'rdf/xml', 'hdt'], graph: str = None, prefixes: Dict[str, str] = None) -> None:

Write the non-transient triples to the file path specified in the NTriples format.

Usage:

>>> m.write("my_triples.nt", format="ntriples")
Parameters
  • file_path: The path of the file containing triples
  • format: One of "ntriples", "turtle", "rdf/xml", "hdt". HDT is built in memory; literals with special characters are stored N-Triples-escaped, following the Rust hdt crate.
  • graph: The IRI of the graph to write.
  • prefixes: The prefixes that will be used in turtle serialization.
def write_cim_xml( self, file_path: Union[str, pathlib.Path], profile_graph: str, model_iri: str = None, version: str = None, description: str = None, created: str = None, scenario_time: str = None, modeling_authority_set: str = None, prefixes: Dict[str, str] = None, graph: str = None) -> None:

Write the legacy CIM XML format.

>>> PROFILE_GRAPH = "urn:graph:profiles"
>>> m = Model()
>>> m.read(model_path, base_iri=publicID, format="rdf/xml")
>>> m.read("61970-600-2_Equipment-AP-Voc-RDFS2020_v3-0-0.rdf", graph=PROFILE_GRAPH, format="rdf/xml")
>>> m.read("61970-600-2_Operation-AP-Voc-RDFS2020_v3-0-0.rdf", graph=PROFILE_GRAPH, format="rdf/xml")
>>> m.write_cim_xml(
>>>     "model.xml",
>>>     profile_graph=PROFILE_GRAPH,
>>>     description = "MyModel",
>>>     created = "2023-09-14T20:27:41",
>>>     scenario_time = "2023-09-14T02:44:43",
>>>     modeling_authority_set="www.westernpower.co.uk",
>>>     version="22",
>>> )
Parameters
  • file_path: The path of the file containing triples
  • profile_graph: The IRI of the graph containing the ontology of the CIM profile to write.
  • model_iri: model_iri a md: FullModel. Is generated if not provided.
  • version: model_iri md: Model.version version .
  • description: model_iri md: Model.description description .
  • created: model_iri md: Model.created created .
  • scenario_time: model_iri md: Model.scenarioTime scenario_time .
  • modeling_authority_set: model_iri md: Model.modelingAuthoritySet modeling_authority_set .
  • prefixes: Prefixes to be used in XML export.
  • graph: The graph to write, defaults to the default graph.
def writes( self, format=typing.Literal['ntriples', 'turtle', 'rdf/xml'], graph: str = None, prefixes: Dict[str, str] = None) -> str:

Write the non-transient triples to a string in memory.

Usage:

>>> s = m.writes(format="turtle")
Parameters
  • format: One of "ntriples", "turtle", "rdf/xml".
  • graph: The IRI of the graph to write.
  • prefixes: The prefixes used for turtle serialization. :return Triples in model in the NTriples format (potentially a large string)
def write_native_parquet(self, folder_path: Union[str, pathlib.Path], graph: str = None) -> None:

Write non-transient triples using the internal native Parquet format.

Usage:

>>> m.write_native_parquet("output_folder")
Parameters
  • folder_path: The path of the folder to write triples in the native format.
  • graph: The IRI of the graph to write.
def get_predicate_iris( self, graph: str = None, include_transient: bool = False) -> List[IRI]:
Parameters
  • graph: The graph to get the predicate iris from.
  • include_transient: Should we include predicates only between transient triples?
Returns

The IRIs of the predicates currently in the given graph.

def get_predicate( self, iri: IRI, graph: str = None, include_transient: bool = False) -> List[SolutionMappings]:
Parameters
  • iri: The predicate IRI
  • graph: The graph to get the predicate from.
  • include_transient: Should we include transient triples?
Returns

A list of the underlying tables that store a given predicate.

def infer( self, ruleset: Union[str, List[str]], graph: str = None, max_iterations: Optional[int] = 100000, max_results: Optional[int] = 10000000, include_transient: bool = True, max_rows: Optional[int] = 100000000, debug: bool = False) -> Optional[Dict[str, polars.dataframe.frame.DataFrame]]:

Run the inference rules that are provided

Parameters
  • ruleset: The Datalog ruleset (a string).
  • graph: Apply the ruleset to this graph, defaults to the default graph, or the graph specified in the rules.
  • max_iterations: Maximum number of iterations.
  • max_results: Maximum number of results.
  • include_transient: Include transient triples when reasoning.
  • max_rows: Maximum estimated rows in result, helps avoid out-of-memory errors.
  • debug: Debugs rule bodies for executions that give no triples.
Returns

The inferred N-Tuples.

a = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#type)
def Triple( subject: Union[Argument, IRI, Variable, BlankNode], predicate: Union[Argument, IRI, Variable, BlankNode], object: Union[Argument, IRI, Variable, Literal, BlankNode], list_expander: Optional[Literal['cross', 'zipMin', 'zipMax']] = None):

An OTTR Triple Pattern used for creating templates. This is the basis pattern which all template instances are rewritten into. Equivalent to:

>>> ottr = Prefix("http://ns.ottr.xyz/0.4/")
... Instance(ottr.suf("Triple"), subject, predicate, object, list_expander)
Parameters
  • subject:
  • predicate:
  • object:
  • list_expander:
Returns
class SolutionMappings:

Detailed information about the solution mappings, the types of the variables and debugging for queries.

SolutionMappings( mappings: polars.dataframe.frame.DataFrame, rdf_types: Dict[str, RDFType])

Create new SolutionMappings object corresponding to solution mappings for variables in e.g. a query.

Parameters
  • mappings: A DataFrame
  • rdf_types: For each column (variable), the RDFType of the variable.
mappings: polars.dataframe.frame.DataFrame
pushdown_paths
rdf_types: Dict[str, RDFType]
debug: Optional[str]
class IndexingOptions:

Options for indexing

IndexingOptions( object_sort_all: bool = None, object_sort_some: List[IRI] = None, fts: str = None, fts_path: str = None, subject_object_index: bool = None)

Defaults to indexing on subjects and objects for select types (e.g. rdf:type and rdfs:label)

Parameters
  • object_sort_all: Enable object-indexing for all suitable predicates (doubles memory requirement).
  • object_sort_some: Enable object-indexing for a selected list of predicates.
  • fts: Enable full text search, in memory if a path is not given.
  • fts_path: Enable full text search, stored at the path
  • subject_object_index: An index used to deduplicate before insertion, speeds up mapping at a moderate memory cost. On by default.
class ValidationReport:

SHACL Validation report. Only constructed by maplib.

def results( self, streaming: bool = False) -> Union[polars.dataframe.frame.DataFrame, SolutionMappings, NoneType]:

Return the results of the validation report, if they exist.

Parameters
  • streaming: Use the Polars streaming functionality.
Returns

The SHACL validation report, as a DataFrame

def details( self, streaming: bool = False) -> Optional[polars.dataframe.frame.DataFrame]:

Returns the details of the validation report. Only available if validation was called with include_details=True.

Parameters
  • streaming: Use the Polars streaming functionality.
Returns

Details of the SHACL validation report, as a DataFrame

conforms: bool

Whether or not the validation report conforms to the shapes

rule_log: str

The log of SHACL rules execution

shape_targets: polars.dataframe.frame.DataFrame

A DataFrame containing the counts of the targets of each shape and constraint

performance: polars.dataframe.frame.DataFrame

Performance statistics for the validation process

report_graph: str

The named graph where the validation report is stored

class Instance:
Instance( iri: IRI, arguments: List[Union[Argument, Variable, IRI, Literal, BlankNode, NoneType]], list_expander: Optional[Literal['cross', 'zipMin', 'zipMax']] = None)

A template instance.

Parameters
  • iri: The IRI of the template to be instantiated.
  • arguments: The arguments for template instantiation.
  • list_expander: (How) should we do list expansion?
iri
class Template:
Template( iri: IRI, parameters: List[Union[Parameter, Variable]], instances: List[Instance])

Create a new OTTR Template

Parameters
  • iri: The IRI of the template
  • parameters:
  • instances:
def instance( self, arguments: List[Union[Argument, Variable, IRI, Literal, NoneType]], list_expander: Literal['cross', 'zipMin', 'zipMax'] = None) -> Instance:
Parameters
  • arguments: The arguments to the template.
  • list_expander: (How) should we list-expand?
Returns
iri: str
instances: List[Instance]

An OTTR Template. Note that accessing parameters- or instances-fields returns copies. To change these fields, you must assign new lists of parameters or instances.

parameters: List[Parameter]
class Argument:
Argument( term: Union[Variable, IRI, Literal], list_expand: Optional[bool] = False)

An argument for a template instance.

Parameters
  • term: The term.
  • list_expand: Should the argument be expanded? Used with the list_expander argument of instance.
variable
class Parameter:
Parameter( variable: Variable, optional: Optional[bool] = False, allow_blank: Optional[bool] = True, rdf_type: Optional[RDFType] = None, default_value: Union[Literal, IRI, BlankNode, NoneType] = None)

Create a new parameter for a Template.

Parameters
  • variable: The variable.
  • optional: Can the variable be unbound?
  • allow_blank: Can the variable be bound to a blank node?
  • rdf_type: The type of the variable. Can be nested.
  • default_value: Default value when no value provided.
allow_blank: bool
variable: Variable
default_value: Union[Literal, IRI, BlankNode, NoneType]

Parameters for template signatures.

optional: bool
rdf_type: Optional[RDFType]
class Variable:

A variable in a template.

Variable(name: str)

Create a new variable.

Parameters
  • name: The name of the variable.
name: str
class RDFType:

The type of a column containing a RDF variable. For instance, IRIs are RDFType.IRI and a string literal is RDFType.Literal("http://www.w3.org/2001/XMLSchema#string")

def Literal(iri):
def Nested(rdf_type):
def Multi(rdf_types):
Unknown: RDFType = RDFType.None()
class xsd:
boolean: IRI = IRI(http://www.w3.org/2001/XMLSchema#boolean)
byte: IRI = IRI(http://www.w3.org/2001/XMLSchema#byte)
date: IRI = IRI(http://www.w3.org/2001/XMLSchema#date)
dateTime: IRI = IRI(http://www.w3.org/2001/XMLSchema#dateTime)
dateTimeStamp: IRI = IRI(http://www.w3.org/2001/XMLSchema#dateTimeStamp)
decimal: IRI = IRI(http://www.w3.org/2001/XMLSchema#decimal)
double: IRI = IRI(http://www.w3.org/2001/XMLSchema#double)
duration: IRI = IRI(http://www.w3.org/2001/XMLSchema#duration)
float: IRI = IRI(http://www.w3.org/2001/XMLSchema#float)
int_: IRI = IRI(http://www.w3.org/2001/XMLSchema#int)
integer: IRI = IRI(http://www.w3.org/2001/XMLSchema#integer)
language: IRI = IRI(http://www.w3.org/2001/XMLSchema#language)
long: IRI = IRI(http://www.w3.org/2001/XMLSchema#long)
short: IRI = IRI(http://www.w3.org/2001/XMLSchema#short)
string: IRI = IRI(http://www.w3.org/2001/XMLSchema#string)
anyURI: IRI = IRI(http://www.w3.org/2001/XMLSchema#anyURI)
dayTimeDuration: IRI = IRI(http://www.w3.org/2001/XMLSchema#dayTimeDuration)
base64Binary: IRI = IRI(http://www.w3.org/2001/XMLSchema#base64Binary)
gDay: IRI = IRI(http://www.w3.org/2001/XMLSchema#gDay)
gMonthDay: IRI = IRI(http://www.w3.org/2001/XMLSchema#gMonthDay)
gMonth: IRI = IRI(http://www.w3.org/2001/XMLSchema#gMonth)
gYear: IRI = IRI(http://www.w3.org/2001/XMLSchema#gYear)
gYearMonth: IRI = IRI(http://www.w3.org/2001/XMLSchema#gYearMonth)
hexBinary: IRI = IRI(http://www.w3.org/2001/XMLSchema#hexBinary)
Name = IRI(http://www.w3.org/2001/XMLSchema#Name)
NCName = IRI(http://www.w3.org/2001/XMLSchema#NCName)
NMTOKEN = IRI(http://www.w3.org/2001/XMLSchema#NMTOKEN)
negativeInteger: IRI = IRI(http://www.w3.org/2001/XMLSchema#negativeInteger)
nonNegativeInteger: IRI = IRI(http://www.w3.org/2001/XMLSchema#nonNegativeInteger)
nonPositiveInteger: IRI = IRI(http://www.w3.org/2001/XMLSchema#nonPositiveInteger)
normalizedString: IRI = IRI(http://www.w3.org/2001/XMLSchema#normalizedString)
token: IRI = IRI(http://www.w3.org/2001/XMLSchema#token)
unsignedByte: IRI = IRI(http://www.w3.org/2001/XMLSchema#unsignedByte)
unsignedInt: IRI = IRI(http://www.w3.org/2001/XMLSchema#unsignedInt)
unsignedLong: IRI = IRI(http://www.w3.org/2001/XMLSchema#unsignedLong)
unsignedShort: IRI = IRI(http://www.w3.org/2001/XMLSchema#unsignedShort)
yearMonthDuration: IRI = IRI(http://www.w3.org/2001/XMLSchema#yearMonthDuration)
class rdf:
type: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#type)
Alt: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#Alt)
Bag: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag)
first: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#first)
HTML: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML)
langString: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#langString)
List: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#List)
nil: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#nil)
object: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#object)
predicate: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate)
Property: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#Property)
rest: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#rest)
Seq: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq)
Statement: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement)
subject: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#subject)
value: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#value)
XMLLiteral: IRI = IRI(http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral)
class rdfs:
Class: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#Class)
comment: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#comment)
Container: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#Container)
Datatype: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#Datatype)
domain: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#domain)
ContainerMembershipProperty: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty)
isDefinedBy: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#isDefinedBy)
label: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#label)
Literal: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#Literal)
member: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#member)
range: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#range)
seeAlso: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#seeAlso)
subClassOf: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#subClassOf)
subPropertyOf: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#subPropertyOf)
Resource: IRI = IRI(http://www.w3.org/2000/01/rdf-schema#Resource)
class owl:

The owl namespace: http://www.w3.org/2002/07/owl#

allValuesFrom: IRI = IRI(http://www.w3.org/2002/07/owl#allValuesFrom)
annotatedProperty: IRI = IRI(http://www.w3.org/2002/07/owl#annotatedProperty)
annotatedSource: IRI = IRI(http://www.w3.org/2002/07/owl#annotatedSource)
annotatedTarget: IRI = IRI(http://www.w3.org/2002/07/owl#annotatedTarget)
assertionProperty: IRI = IRI(http://www.w3.org/2002/07/owl#assertionProperty)
cardinality: IRI = IRI(http://www.w3.org/2002/07/owl#cardinality)
complementOf: IRI = IRI(http://www.w3.org/2002/07/owl#complementOf)
datatypeComplementOf: IRI = IRI(http://www.w3.org/2002/07/owl#datatypeComplementOf)
differentFrom: IRI = IRI(http://www.w3.org/2002/07/owl#differentFrom)
disjointUnionOf: IRI = IRI(http://www.w3.org/2002/07/owl#disjointUnionOf)
disjointWith: IRI = IRI(http://www.w3.org/2002/07/owl#disjointWith)
distinctMembers: IRI = IRI(http://www.w3.org/2002/07/owl#distinctMembers)
equivalentClass: IRI = IRI(http://www.w3.org/2002/07/owl#equivalentClass)
equivalentProperty: IRI = IRI(http://www.w3.org/2002/07/owl#equivalentProperty)
hasKey: IRI = IRI(http://www.w3.org/2002/07/owl#hasKey)
hasSelf: IRI = IRI(http://www.w3.org/2002/07/owl#hasSelf)
hasValue: IRI = IRI(http://www.w3.org/2002/07/owl#hasValue)
intersectionOf: IRI = IRI(http://www.w3.org/2002/07/owl#intersectionOf)
inverseOf: IRI = IRI(http://www.w3.org/2002/07/owl#inverseOf)
maxCardinality: IRI = IRI(http://www.w3.org/2002/07/owl#maxCardinality)
maxQualifiedCardinality: IRI = IRI(http://www.w3.org/2002/07/owl#maxQualifiedCardinality)
members: IRI = IRI(http://www.w3.org/2002/07/owl#members)
minCardinality: IRI = IRI(http://www.w3.org/2002/07/owl#minCardinality)
minQualifiedCardinality: IRI = IRI(http://www.w3.org/2002/07/owl#minQualifiedCardinality)
onClass: IRI = IRI(http://www.w3.org/2002/07/owl#onClass)
onDataRange: IRI = IRI(http://www.w3.org/2002/07/owl#onDataRange)
onDatatype: IRI = IRI(http://www.w3.org/2002/07/owl#onDatatype)
onProperties: IRI = IRI(http://www.w3.org/2002/07/owl#onProperties)
onProperty: IRI = IRI(http://www.w3.org/2002/07/owl#onProperty)
oneOf: IRI = IRI(http://www.w3.org/2002/07/owl#oneOf)
propertyChainAxiom: IRI = IRI(http://www.w3.org/2002/07/owl#propertyChainAxiom)
propertyDisjointWith: IRI = IRI(http://www.w3.org/2002/07/owl#propertyDisjointWith)
qualifiedCardinality: IRI = IRI(http://www.w3.org/2002/07/owl#qualifiedCardinality)
sameAs: IRI = IRI(http://www.w3.org/2002/07/owl#sameAs)
someValuesFrom: IRI = IRI(http://www.w3.org/2002/07/owl#someValuesFrom)
sourceIndividual: IRI = IRI(http://www.w3.org/2002/07/owl#sourceIndividual)
targetIndividual: IRI = IRI(http://www.w3.org/2002/07/owl#targetIndividual)
targetValue: IRI = IRI(http://www.w3.org/2002/07/owl#targetValue)
unionOf: IRI = IRI(http://www.w3.org/2002/07/owl#unionOf)
withRestrictions: IRI = IRI(http://www.w3.org/2002/07/owl#withRestrictions)
AllDifferent: IRI = IRI(http://www.w3.org/2002/07/owl#AllDifferent)
AllDisjointClasses: IRI = IRI(http://www.w3.org/2002/07/owl#AllDisjointClasses)
AllDisjointProperties: IRI = IRI(http://www.w3.org/2002/07/owl#AllDisjointProperties)
Annotation: IRI = IRI(http://www.w3.org/2002/07/owl#Annotation)
AnnotationProperty: IRI = IRI(http://www.w3.org/2002/07/owl#AnnotationProperty)
Axiom: IRI = IRI(http://www.w3.org/2002/07/owl#Axiom)
Class: IRI = IRI(http://www.w3.org/2002/07/owl#Class)
DataRange: IRI = IRI(http://www.w3.org/2002/07/owl#DataRange)
DatatypeProperty: IRI = IRI(http://www.w3.org/2002/07/owl#DatatypeProperty)
DeprecatedClass: IRI = IRI(http://www.w3.org/2002/07/owl#DeprecatedClass)
DeprecatedProperty: IRI = IRI(http://www.w3.org/2002/07/owl#DeprecatedProperty)
FunctionalProperty: IRI = IRI(http://www.w3.org/2002/07/owl#FunctionalProperty)
InverseFunctionalProperty: IRI = IRI(http://www.w3.org/2002/07/owl#InverseFunctionalProperty)
IrreflexiveProperty: IRI = IRI(http://www.w3.org/2002/07/owl#IrreflexiveProperty)
NamedIndividual: IRI = IRI(http://www.w3.org/2002/07/owl#NamedIndividual)
NegativePropertyAssertion: IRI = IRI(http://www.w3.org/2002/07/owl#NegativePropertyAssertion)
ObjectProperty: IRI = IRI(http://www.w3.org/2002/07/owl#ObjectProperty)
Ontology: IRI = IRI(http://www.w3.org/2002/07/owl#Ontology)
OntologyProperty: IRI = IRI(http://www.w3.org/2002/07/owl#OntologyProperty)
ReflexiveProperty: IRI = IRI(http://www.w3.org/2002/07/owl#ReflexiveProperty)
Restriction: IRI = IRI(http://www.w3.org/2002/07/owl#Restriction)
SymmetricProperty: IRI = IRI(http://www.w3.org/2002/07/owl#SymmetricProperty)
TransitiveProperty: IRI = IRI(http://www.w3.org/2002/07/owl#TransitiveProperty)
backwardCompatibleWith: IRI = IRI(http://www.w3.org/2002/07/owl#backwardCompatibleWith)
deprecated: IRI = IRI(http://www.w3.org/2002/07/owl#deprecated)
incompatibleWith: IRI = IRI(http://www.w3.org/2002/07/owl#incompatibleWith)
priorVersion: IRI = IRI(http://www.w3.org/2002/07/owl#priorVersion)
versionInfo: IRI = IRI(http://www.w3.org/2002/07/owl#versionInfo)
Nothing: IRI = IRI(http://www.w3.org/2002/07/owl#Nothing)
Thing: IRI = IRI(http://www.w3.org/2002/07/owl#Thing)
bottomDataProperty: IRI = IRI(http://www.w3.org/2002/07/owl#bottomDataProperty)
topDataProperty: IRI = IRI(http://www.w3.org/2002/07/owl#topDataProperty)
bottomObjectProperty: IRI = IRI(http://www.w3.org/2002/07/owl#bottomObjectProperty)
topObjectProperty: IRI = IRI(http://www.w3.org/2002/07/owl#topObjectProperty)
imports: IRI = IRI(http://www.w3.org/2002/07/owl#imports)
versionIRI: IRI = IRI(http://www.w3.org/2002/07/owl#versionIRI)
rational: IRI = IRI(http://www.w3.org/2002/07/owl#rational)
real: IRI = IRI(http://www.w3.org/2002/07/owl#real)
class IRI:
IRI(iri: str)

Create a new IRI

Parameters
  • iri: IRI (without < and >).
iri: str

An IRI.

class Literal:

An RDF literal.

Literal(value: str, data_type: IRI = None, language: str = None)

Create a new RDF Literal

Parameters
  • value: The lexical representation of the value.
  • data_type: The data type of the value (an IRI).
  • language: The language tag of the value.
def to_native(self) -> Union[int, float, bool, str, datetime.datetime, datetime.date]:
Returns
language: Optional[str]
datatype: Optional[IRI]
value: str
class Prefix:

A prefix that can be used to ergonomically build iris.

Prefix(iri, prefix_name=None)

Create a new prefix.

Parameters
  • iri: The prefix IRI.
  • prefix_name: The name of the prefix
def suf(self, suffix: str) -> IRI:

Create an IRI by appending the suffix.

Parameters
  • suffix: The suffix to append.
Returns
class BlankNode:

A Blank Node.

BlankNode(name: str)

Create a new Blank Node

Parameters
  • name: Name of blank node (without _: ).
name: str
def explore( m: Model, host: str = 'localhost', port: int = 8000, bind: str = 'localhost', popup=True, fts=True):
 82def explore(*args, **kwargs):
 83    """Deprecated way to start an explore session.
 84Use the explore method on a Model object instead
 85"""
 86    logger.warn("Calling `maplib.explore` is deprecated, use `m.explore()` on a `Model` object instead")
 87    if kwargs.get("popup") == None or kwargs.get("popup") == True:
 88        logger.warn("""Calling explore without a popup argument defaults to it being on.
 89The popup argument is deprecated, so if you are relying on explore() opening a browser window
 90please change this to something like
 91
 92```
 93import webbrowser
 94from maplib import Model
 95
 96m = Model()
 97...
 98s = m.explore()
 99webbrowser.open(s.url, new=2)
100```
101""")
102        kwargs["popup"] = True
103    elif kwargs.get("popup") == False:
104        logger.warn("The new explore function on a Model, no longer defaults to popping up the browser ")
105
106    return _explore(*args, **kwargs)

Starts a graph explorer session. To run from Jupyter Notebook use:

>>> from maplib import explore
>>>
>>> server = explore(m)
You can later stop the server with
>>> server.stop()
Parameters
  • m: The Model to explore
  • host: The hostname that we will point the browser to.
  • port: The port where the graph explorer webserver listens on.
  • bind: Bind to the following host / ip.
  • popup: Pop up the browser window.
  • fts: Enable full text search indexing
def add_triples( source: Model, target: Model, source_graph: str = None, target_graph: str = None):
 5def add_triples(
 6    source: Model, target: Model, source_graph: str = None, target_graph: str = None
 7):
 8    """(Zero) copy the triples from one Model into another.
 9
10    :param source: The source model
11    :param target: The target model
12    :param source_graph: The named graph in the source model to copy from. None means default graph.
13    :param target_graph: The named graph in the target model to copy into. None means default graph.
14    """
15    for p in source.get_predicate_iris(source_graph):
16        subject = Variable("subject")
17        object = Variable("object")
18        template = Template(
19            iri=IRI("urn:maplib:tmp"),
20            parameters=[subject, object],
21            instances=[Triple(subject, p, object)],
22        )
23        sms = source.get_predicate(p, source_graph)
24        for sm in sms:
25            target.map(
26                template,
27                data=sm,
28                graph=target_graph,
29            )

(Zero) copy the triples from one Model into another.

Parameters
  • source: The source model
  • target: The target model
  • source_graph: The named graph in the source model to copy from. None means default graph.
  • target_graph: The named graph in the target model to copy into. None means default graph.
def generate_templates(m: Model, graph: Optional[str]) -> Dict[str, Template]:
 9def generate_templates(m: Model, graph: Optional[str]) -> Dict[str, Template]:
10    """Generate templates for instantiating the classes in an ontology
11
12    :param m: The model where the ontology is stored. We mainly rely on rdfs:subClassOf, rdfs:range and rdfs:domain.
13    :param graph: The named graph where the ontology is stored.
14
15    :return A dictionary of templates for instantiating the classes in the ontology, where the keys are the class URIs.
16
17    Usage example - note that it is important to add the templates to the Model you want to populate.
18    >>> from maplib import Model, create_templates
19    >>>
20    >>> m_ont = Model()
21    >>> m_ont.read("my_ontology.ttl")
22    >>> templates = generate_templates(m_ont)
23    >>> m = Model()
24    >>> for t in templates.values():
25    >>>     m.add_template(t)
26    >>> m.map("https://example.net/MyClass", df)
27    """
28
29    properties = get_properties(m, graph=graph)
30    properties_by_domain = {}
31    properties_by_range = {}
32    for r in properties.iter_rows(named=True):
33        dom = r["domain"]
34        if dom in properties_by_domain:
35            properties_by_domain[dom].append(r)
36        else:
37            properties_by_domain[dom] = [r]
38
39        ran = r["range"]
40        if ran in properties_by_range:
41            properties_by_range[ran].append(r)
42        else:
43            properties_by_range[ran] = [r]
44
45    subclasses = get_subclasses(m, graph=graph)
46
47    subclass_of = {}
48    for r in (
49        subclasses.group_by("child")
50        .agg(pl.col("parent").alias("parents"))
51        .iter_rows(named=True)
52    ):
53        subclass_of[r["child"]] = r["parents"]
54
55    class_ordering = topological_sort(subclasses)
56
57    templates_without_typing = generate_templates_without_typing(
58        properties_by_domain, properties_by_range, class_ordering, subclass_of
59    )
60    templates_with_typing = generate_templates_with_typing(templates_without_typing)
61    templates = {}
62    for t, template in templates_without_typing.items():
63
64        templates[t + "_notype"] = template
65    for t, template in templates_with_typing.items():
66        templates[t] = template
67
68    return templates

Generate templates for instantiating the classes in an ontology

Parameters
  • m: The model where the ontology is stored. We mainly rely on rdfs:subClassOf, rdfs:range and rdfs: domain.
  • graph: The named graph where the ontology is stored.

:return A dictionary of templates for instantiating the classes in the ontology, where the keys are the class URIs.

Usage example - note that it is important to add the templates to the Model you want to populate.

class MaplibException(builtins.Exception):

Common base class for all non-exit exceptions.