Skip to content

update_vocabs

Implements an entailment + validation workflow.

update_vocabs starts by loading one or more DomainConfiguration's from RDF files and/or SPARQL endpoints, and a series of profile definitions (also from a list of RDF files and/or SPARQL endpoints). From there, individual or batch processing of files can be done, as well as uploading the results to a target triplestore.

This script can be used as a library, or run directly from the cli; please refer to the OGC NamingAuthority repository for usage details on the latter.

get_entailed_base_path(f, g, rootpattern=None, entailed_dir=DEFAULT_ENTAILED_DIR)

Tries to find the base output file path for an entailed version of a source Graph.

Parameters:

Name Type Description Default
f Path

the original path of the source file

required
g Graph

the Graph loaded from the source file

required
rootpattern Union[str, None]

a root pattern to filter candidate URIs

None
entailed_dir str

the name of the base entailed files directory

DEFAULT_ENTAILED_DIR
Source code in ogc/na/update_vocabs.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def get_entailed_base_path(f: Path, g: Graph, rootpattern: Union[str, None] = None,
                           entailed_dir: str = DEFAULT_ENTAILED_DIR) -> tuple:
    """
    Tries to find the base output file path for an entailed version of a source Graph.

    :param f: the original path of the source file
    :param g: the [Graph][rdflib.Graph] loaded from the source file
    :param rootpattern: a root pattern to filter candidate URIs
    :param entailed_dir: the name of the base entailed files directory
    """

    if not rootpattern:
        # just assume filename is going to be fine
        return (f.parent / entailed_dir / f.name,
                f.name, next(get_graph_uri_for_vocab(g), None))

    canonical_filename = None
    conceptscheme = None
    multiple_cs_warning = True
    for graphuri in get_graph_uri_for_vocab(g):

        if rootpattern in graphuri:
            cs_filename = graphuri.rsplit(rootpattern)[1].split('#', 1)[0]
            conceptscheme = graphuri
        else:
            logger.info('File %s: ignoring concept scheme %s not matching domain path %s',
                        str(f), graphuri, rootpattern)
            continue

        if canonical_filename and canonical_filename != cs_filename and multiple_cs_warning:
            multiple_cs_warning = False
            logger.warning("File %s contains multiple concept schemes", str(f))

        canonical_filename = cs_filename

    if not canonical_filename:
        logger.warning('File %s contains no concept schemes matching domain path %s; using filename',
                       str(f), rootpattern)
        canonical_filename = f.name
    elif canonical_filename.startswith('/'):
        canonical_filename = canonical_filename[1:]

    return (f.parent / entailed_dir / Path(canonical_filename),
            canonical_filename, conceptscheme)

get_graph_uri_for_vocab(g=None)

Find a target graph URI in a vocabulary Graph.

In effect, this function merely looks for SKOS ConceptScheme's.

Parameters:

Name Type Description Default
g Graph

the Graph for which to find the target URI

None

Returns:

Type Description
Generator[str, None, None]

a Node generator

Source code in ogc/na/update_vocabs.py
107
108
109
110
111
112
113
114
115
116
117
118
def get_graph_uri_for_vocab(g: Graph = None) -> Generator[str, None, None]:
    """
    Find a target graph URI in a vocabulary [Graph][rdflib.Graph].

    In effect, this function merely looks for
    [SKOS ConceptScheme's](https://www.w3.org/TR/2008/WD-skos-reference-20080829/skos.html#ConceptScheme).

    :param g: the [Graph][rdflib.Graph] for which to find the target URI
    :return: a [Node][rdflib.term.Node] generator
    """
    for s in g.subjects(predicate=RDF.type, object=SKOS.ConceptScheme):
        yield str(s)

load_vocab(vocab, graph_uri, graph_store, auth_details=None)

Loads a vocabulary onto a triplestore using the SPARQL Graph Store protocol.

Parameters:

Name Type Description Default
vocab Union[Graph, str, Path]

the file or Graph to load

required
graph_uri str

a target graph URI

required
graph_store str

the target SPARQL Graph Store protocol URL

required
auth_details tuple[str]

a (username, password) tuple for authentication

None

Returns:

Type Description
None
Source code in ogc/na/update_vocabs.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def load_vocab(vocab: Union[Graph, str, Path], graph_uri: str,
               graph_store: str, auth_details: tuple[str] = None) -> None:
    """
    Loads a vocabulary onto a triplestore using the [SPARQL Graph Store
    protocol](https://www.w3.org/TR/sparql11-http-rdf-update/).

    :param vocab: the file or Graph to load
    :param graph_uri: a target graph URI
    :param graph_store: the target SPARQL Graph Store protocol URL
    :param auth_details: a `(username, password)` tuple for authentication
    :return:
    """
    # PUT is equivalent to DROP GRAPH + INSERT DATA
    # Graph is automatically created per Graph Store spec

    if isinstance(vocab, Graph):
        content = vocab.serialize(format='ttl')
    else:
        with open(vocab, 'rb') as f:
            content = f.read()

    r = requests.put(
        graph_store,
        params={
            'graph': graph_uri,
        },
        auth=auth_details,
        headers={
            'Content-type': 'text/turtle',
        },
        data=content
    )
    logger.debug('HTTP status code: %d', r.status_code)
    r.raise_for_status()

make_rdf(filename, g, rootpath=None, entailment_directory=DEFAULT_ENTAILED_DIR, provenance_metadata=None)

Serializes entailed RDF graphs in several output formats for a given input graph.

Parameters:

Name Type Description Default
filename Union[str, Path]

the original source filename

required
g Graph

Graph loaded from the source file

required
rootpath Union[str, None]

a path to filter concept schemes inside the Graph and infer the main one

None
provenance_metadata ProvenanceMetadata

provenance metadata (None to ignore)

None
entailment_directory Union[str, Path]

name for the output subdirectory for entailed files

DEFAULT_ENTAILED_DIR

Returns:

Type Description
Path

the output path for the Turtle version of the entailed files

Source code in ogc/na/update_vocabs.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def make_rdf(filename: Union[str, Path], g: Graph, rootpath: Union[str, None] = None,
             entailment_directory: Union[str, Path] = DEFAULT_ENTAILED_DIR,
             provenance_metadata: ProvenanceMetadata = None,) -> Path:
    """
    Serializes entailed RDF graphs in several output formats for a given input
    graph.

    :param filename: the original source filename
    :param g: [Graph][rdflib.Graph] loaded from the source file
    :param rootpath: a path to filter concept schemes inside the Graph and infer the main one
    :param provenance_metadata: provenance metadata (None to ignore)
    :param entailment_directory: name for the output subdirectory for entailed files
    :return: the output path for the Turtle version of the entailed files
    """
    if not isinstance(filename, Path):
        filename = Path(filename)
    filename = filename.resolve()

    if isinstance(entailment_directory, Path):
        entailment_directory = entailment_directory.resolve()

    loadable_ttl = None
    newbasepath, canonical_filename, conceptschemeuri = \
        get_entailed_base_path(filename, g, rootpath, entailment_directory)
    if newbasepath:
        newbasepath.parent.mkdir(parents=True, exist_ok=True)
    for entailed_format in ENTAILED_FORMATS:
        if newbasepath:
            newpath = newbasepath.with_suffix('.' + entailed_format['extension'])
            if provenance_metadata:
                provenance_metadata.generated = FileProvenanceMetadata(filename=newpath,
                                                                       mime_type=entailed_format['mime'],
                                                                       use_bnode=False)
                g = generate_provenance(g + Graph(), provenance_metadata, 'ogc.na.update_vocabs')
            g.serialize(destination=newpath, format=entailed_format['format'])
            if entailed_format['format'] == 'ttl':
                loadable_ttl = newpath

    if filename.stem != canonical_filename:
        logger.info("New file name %s -> %s for %s",
                    filename.stem, canonical_filename, conceptschemeuri)

    return loadable_ttl

setup_logging(debug=False)

Sets up logging level and handlers (logs WARNING and ERROR to stderr).

Parameters:

Name Type Description Default
debug bool

whether to set DEBUG level

False
Source code in ogc/na/update_vocabs.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def setup_logging(debug: bool = False):
    """
    Sets up logging level and handlers (logs WARNING and ERROR
    to stderr).

    :param debug: whether to set DEBUG level
    """
    root_logger = logging.getLogger()
    root_logger.setLevel(logging.DEBUG if debug else logging.INFO)

    fmt = logging.Formatter(fmt='%(name)s [%(levelname)s] %(message)s')

    handler_out = logging.StreamHandler(sys.stdout)
    handler_out.setLevel(logging.DEBUG)
    handler_out.setFormatter(fmt)
    handler_out.addFilter(lambda rec: rec.levelno <= logging.INFO)

    handler_err = logging.StreamHandler(sys.stderr)
    handler_err.setLevel(logging.WARNING)
    handler_err.setFormatter(fmt)

    root_logger.addHandler(handler_out)
    root_logger.addHandler(handler_err)