Skip to content

Instantly share code, notes, and snippets.

@larsgw
Last active October 18, 2025 17:03
Show Gist options
  • Select an option

  • Save larsgw/c3137fe0cfb2176c0f053b081972a16b to your computer and use it in GitHub Desktop.

Select an option

Save larsgw/c3137fe0cfb2176c0f053b081972a16b to your computer and use it in GitHub Desktop.
Attempts at running ShEx validation

Validating linked data from the Library of Identification resources (https://doi.org/10.5281/zenodo.15552611).

shex.js

  • node ./tools/validate-shex.js
  • shex-validate -x docs/linked-data/shape.shex -d data.nq -n "https://purl.org/identification-resources/catalog/B1"

rudof

  • rudof shex-validate -s docs/linked-data/shape_rudof.shex -n "<https://purl.org/identification-resources/catalog/B1>" -t nquads -r json data.nq

jena-shex

  • node ./tools/validate-shex.groovy
PREFIX ac: <http://rs.tdwg.org/ac/terms/>
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX dwciri: <http://rs.tdwg.org/dwc/iri/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
start=@<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#work>
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#work> IRI CLOSED {
a [bibo:BookSection bibo:AcademicArticle bibo:Website bibo:Book];
dcterms:title rdf:langString+;
dcterms:language [<http://id.loc.gov/vocabulary/iso639-1/~>]+;
dcterms:hasPart [<https://purl.org/identification-resources/catalog/~>]*;
dcterms:isPartOf [<https://purl.org/identification-resources/catalog/~>]*;
dcterms:references [<https://purl.org/identification-resources/catalog/~>]*;
dcterms:isReferencedBy [<https://purl.org/identification-resources/catalog/~>]*;
dcterms:isVersionOf [<https://purl.org/identification-resources/catalog/~>]*;
dcterms:hasPart [<https://purl.org/identification-resources/resource/~>]*;
bibo:annotates [<https://purl.org/identification-resources/catalog/~>]*;
bibo:cites [<https://purl.org/identification-resources/catalog/~>]*;
bibo:citedBy [<https://purl.org/identification-resources/catalog/~>]*;
bibo:translationOf [<https://purl.org/identification-resources/catalog/~>]*;
dcterms:creator [<https://purl.org/identification-resources/author/~>]*;
dcterms:issued xsd:date OR xsd:gYearMonth OR xsd:gYear OR LITERAL?;
dcterms:publisher [<https://purl.org/identification-resources/publisher/~>]*;
dcterms:rights LITERAL*;
dcterms:rights [<https://spdx.org/licenses/~>]*;
schema:url IRI?;
schema:encoding @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#file>*;
schema:archivedAt IRI*;
bibo:authorList @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#authors>?;
bibo:handle IRI?;
bibo:issn LITERAL?;
bibo:isbn LITERAL*;
bibo:isbn10 LITERAL*;
bibo:isbn13 LITERAL*;
bibo:uri [<http://www.wikidata.org/entity/~>]?;
bibo:doi LITERAL?;
bibo:volume LITERAL?;
bibo:issue LITERAL?;
bibo:pages LITERAL?;
bibo:pageStart LITERAL?;
bibo:pageEnd LITERAL?;
bibo:numPages xsd:integer?;
bibo:edition LITERAL?
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#authors> BNODE CLOSED {
rdf:first [<https://purl.org/identification-resources/author/~>];
rdf:rest [rdf:nil] OR @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#authors>
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#author> IRI CLOSED {
a [foaf:Person];
foaf:name LITERAL;
owl:sameAs [<http://www.wikidata.org/entity/~>]?
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#publisher> IRI CLOSED {
a [foaf:Organization];
foaf:name LITERAL;
owl:sameAs [<http://www.wikidata.org/entity/~>]?
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#file> BNODE CLOSED {
a [schema:MediaObject];
schema:contentUrl IRI
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#resource> IRI CLOSED {
a [bibo:DocumentPart];
dcterms:isPartOf [<https://purl.org/identification-resources/catalog/~>];
dcterms:type [
<http://purl.org/dc/dcmitype/Software>
<http://purl.org/dc/dcmitype/Text>
<http://purl.org/dc/dcmitype/Collection>
];
dcterms:spatial @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#place>*;
dcterms:subject @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#scientificName>*;
ac:subtype [<http://rs.tdwg.org/acsubtype/values/IdentificationKey>]?;
( ac:taxonCoverage @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#taxon> | ac:taxonCoverage [<https://gbif.org/species/~>]? );
ac:tag LITERAL*;
ac:taxonCount xsd:integer?;
ac:subjectPart [<http://rs.tdwg.org/acpart/values/~>]*;
ac:subjectPartLiteral LITERAL*;
dwc:taxonID @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#taxon>*;
dwciri:lifeStage [<http://rs.gbif.org/vocabulary/gbif/life_stage/~>]*;
dwciri:sex [<http://rs.gbif.org/vocabulary/gbif/sex/~>]*;
dwc:caste LITERAL*;
dcterms:title rdf:langString*;
dcterms:language [<http://id.loc.gov/vocabulary/iso639-1/~>]*;
dcterms:hasPart [<https://purl.org/identification-resources/resource/~>]*;
dcterms:isPartOf [<https://purl.org/identification-resources/resource/~>]*;
dcterms:references [<https://purl.org/identification-resources/resource/~>]*;
dcterms:isReferencedBy [<https://purl.org/identification-resources/resource/~>]*;
dcterms:isVersionOf [<https://purl.org/identification-resources/resource/~>]*;
dcterms:hasPart [<https://purl.org/identification-resources/resource/~>]*;
bibo:annotates [<https://purl.org/identification-resources/resource/~>]*;
bibo:cites [<https://purl.org/identification-resources/resource/~>]*;
bibo:citedBy [<https://purl.org/identification-resources/resource/~>]*;
bibo:translationOf [<https://purl.org/identification-resources/resource/~>]*;
dcterms:creator [<https://purl.org/identification-resources/author/~>]*;
dcterms:issued xsd:date OR xsd:gYearMonth OR xsd:gYear OR LITERAL?;
dcterms:publisher [<https://purl.org/identification-resources/publisher/~>]*;
dcterms:rights LITERAL*;
dcterms:rights [<https://spdx.org/licenses/~>]*;
schema:url IRI?;
schema:encoding @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#file>*;
schema:archivedAt IRI*;
bibo:authorList @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#authors>?;
bibo:handle IRI?;
bibo:issn LITERAL?;
bibo:isbn LITERAL*;
bibo:isbn10 LITERAL*;
bibo:isbn13 LITERAL*;
bibo:uri [<http://www.wikidata.org/entity/~>]?;
bibo:doi LITERAL?;
bibo:volume LITERAL?;
bibo:issue LITERAL?;
bibo:pages LITERAL?;
bibo:pageStart LITERAL?;
bibo:pageEnd LITERAL?;
bibo:numPages xsd:integer?;
bibo:edition LITERAL?
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#taxon> IRI /^https:\/\/purl\.org\/identification-resources\/taxon\/T[1-9][0-9]*$/ CLOSED {
a [dwc:Taxon];
dwc:scientificname LITERAL?;
dwc:taxonRank [<http://rs.gbif.org/vocabulary/gbif/rank/~>]?;
dwc:parentNameUsageID @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#taxon>?;
owl:sameAs [<http://www.wikidata.org/entity/~>]?;
owl:sameAs [<https://gbif.org/species/~>]?
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#place> IRI /^https:\/\/purl\.org\/identification-resources\/place\/G[1-9][0-9]*$/ CLOSED {
a [dcterms:Location];
dcterms:title LITERAL;
owl:sameAs [<http://www.wikidata.org/entity/~>]?
}
<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#scientificName> IRI /^https:\/\/purl\.org\/identification-resources\/resource\/B[1-9][0-9]*:[1-9][0-9]*#B[1-9][0-9]*:[1-9][0-9]*:[1-9][0-9]*$/ CLOSED {
a [dwc:Taxon];
dwc:scientificName LITERAL;
dwc:scientificNameAuthorship LITERAL?;
dwc:genericName LITERAL?;
dwc:infragenericEpithet LITERAL?;
dwc:specificEpithet LITERAL?;
dwc:infraspecificEpithet LITERAL?;
dwc:taxonRemarks LITERAL?;
dwc:verbatimIdentification LITERAL;
(
dwc:taxonRank [<http://rs.gbif.org/vocabulary/gbif/rank/~>]
|
dwc:taxonRank LITERAL
);
dwc:taxonomicStatus [<http://rs.gbif.org/vocabulary/gbif/taxonomicStatus/~>];
dwc:acceptedNameUsageID @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#scientificName>?;
dwc:parentNameUsageID @<https://github.com/identification-resources/catalog/blob/main/docs/linked-data/shape.shex#scientificName>?;
dwc:taxonID [<https://gbif.org/species/~>]?;
dwc:taxonID [<https://www.checklistbank.org/dataset/309120/taxon/~>]?
}
@Grab(group='io.github.egonw.bacting', module='managers-rdf', version='0.3.2')
import org.apache.jena.shex.ShexStatus
rdf = new net.bioclipse.managers.RDFManager("./");
store = rdf.createInMemoryStore(true)
store = rdf.importFile(store, "data.ttl", "TURTLE")
report = rdf.validateAllOfType(
store,
"docs/linked-data/shape.shex",
"file:///home/loir/catalog/docs/linked-data/work",
"http://purl.org/ontology/bibo/BookSection"
)
total = 0
fails = 0
report.forEachReport { reportEntry ->
total++
switch (reportEntry.status) {
case ShexStatus.nonconformant :
fails++
System.err.println "failed on ${reportEntry.focus}, reason: \"${reportEntry.reason}\""
}
}
System.err.println "success for ${total - fails}/${total} nodes"
if (fails > 0) {
System.exit(1)
}
const { promises: fs } = require('fs')
const path = require('path')
const ShExParser = require('@shexjs/parser')
const ShExTerm = require('@shexjs/term')
const { ctor: RdfJsDb } = require('@shexjs/neighborhood-rdfjs')
const { ShExValidator } = require('@shexjs/validator')
const { Parser: N3Parser, Store: N3Store } = require('n3')
const BASE = 'http://raw.githubusercontent.com/identification-resources/main/docs/linked-data/'
const SHAPE_MAP = {
// TODO
'http://purl.org/ontology/bibo/BookSection': 'work',
'http://purl.org/ontology/bibo/AcademicArticle': 'work',
'http://purl.org/ontology/bibo/Website': 'work',
'http://purl.org/ontology/bibo/Book': 'work',
// 'http://xmlns.com/foaf/0.1/Person': 'author',
// 'http://xmlns.com/foaf/0.1/Organization': 'publisher'
}
async function parseTurtle (file) {
return new Promise(function (resolve, reject) {
const store = new N3Store()
new N3Parser().parse(file, function (error, quad, _prefixes) {
if (error) {
reject(error)
} else if (quad) {
store.addQuad(quad)
} else {
resolve(store)
}
})
})
}
class DebugTracker {
depth = 0;
_log (...args) {
console.error(' '.repeat(this.depth), ...args)
}
recurse (_rec) { }
known (_res) { }
shapeDecl (f, s) {
// this._log(f.id || f.value, JSON.stringify(s))
}
enter (_term, _shapeLabel) {
this._log('{', _term.id || _term.value, _shapeLabel.slice(BASE.length))
++this.depth
}
exit (_term, _shapeLabel, _res) {
--this.depth
this._log('}')
}
}
async function main () {
const shexc = await fs.readFile(path.join(__dirname, '../docs/linked-data/shape.shex'), 'utf8')
const turtle = await fs.readFile(path.join(__dirname, '../data.ttl'), 'utf8') // TODO
const schema = ShExParser.construct(BASE).parse(shexc)
const store = await parseTurtle(turtle)
const validator = new ShExValidator(schema, RdfJsDb(store))
const shapeMap = []
for (const quad of store.readQuads(null, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', null, null)) {
const type = quad.object.id
if (type in SHAPE_MAP) {
shapeMap.push({
node: quad.subject.id,
shape: BASE + SHAPE_MAP[type]
})
}
}
// TODO
{
const _old = validator.validateShapeExpr
validator.validateShapeExpr = function (focus, shapeExpr, ctx) {
ctx.tracker.shapeDecl(focus, shapeExpr)
return _old.call(this, focus, shapeExpr, ctx)
}
}
const tracker = new DebugTracker()
let passed = 0
for (const { node, shape } of shapeMap) {
console.error('==', node, '==') // TODO
console.time('validation')
const report = validator.validateNodeShapePair(ShExTerm.ld2RdfJsTerm(node), shape, tracker)
console.timeEnd('validation')
if ('errors' in report) {
console.error(require('util').inspect(report.errors, { depth: Infinity, colors: true }))
} else {
passed++
}
}
console.error(`${passed}/${shapeMap.length} nodes passed`)
process.exit(passed === shapeMap.length ? 0 : 1)
}
main().catch(console.error)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment