-
Notifications
You must be signed in to change notification settings - Fork 0
/
mk_skos.py
86 lines (64 loc) · 2.48 KB
/
mk_skos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from rdflib import Graph, Namespace, Literal, RDF, RDFS, URIRef
from rdflib.namespace import SKOS
from urllib.parse import quote # For URL-escaping
import re
# Base IRI for the SKOS vocabulary
base_iri = "http://www.consensys.net/ethereum-skos#"
# Create a new RDF graph
g = Graph()
# Define the SKOS namespace and Ethereum namespace
EX = Namespace(base_iri)
# Bind namespaces
g.bind("skos", SKOS)
g.bind("rdfs", RDFS)
g.bind("ex", EX)
# Read the glossary file
glossary_file = "glossary.txt"
# Keep track of terms seen
terms = {}
# Parsing the glossary file
with open(glossary_file, 'r') as file:
lines = file.readlines()
# Function to create SKOS concepts and their relations
def create_skos_concept(normterm, term_escaped, definition):
# Create a concept URIRef with the escaped term
concept = URIRef(base_iri + term_escaped)
# Add triples for the concept, its label, and definition
g.add((concept, RDF.type, SKOS.Concept))
g.add((concept, SKOS.prefLabel, Literal(normterm)))
g.add((concept, SKOS.definition, Literal(definition)))
# Look for EIP numbers in definitions and create links if found
p = re.compile(r"\(EIP-\d*\)")
m = p.search(definition)
if m:
eip = m.group().lower().lstrip('(').rstrip(')')
eip_url = 'https://github.com/ethereum/EIPs/blob/master/EIPS/' + eip + '.md'
g.add((concept, SKOS.broader, URIRef(eip_url)))
#print(f"Added '{eip_url}'")
return concept
# Process each line in the glossary
for line in lines:
if ( len(line) > 5 ):
# Split the line into term and definition
term, definition = line.strip().split(":", 1)
# Normalise term's case
normterm = term.title()
# URL-escape the term for safe usage in the URI
term_escaped = quote(normterm.strip())
if ( term_escaped not in terms ):
# Create a SKOS concept for the term
concept = create_skos_concept(normterm, term_escaped, definition.strip())
# Keep track of terms seen
newterm = {term_escaped : concept}
terms.update(newterm)
else:
# Add the definition to the previously-seen term's concept
concept = terms.get(term_escaped)
g.add((concept, RDFS.comment, Literal(definition)))
else:
# Found a non-matching line
next
# Save the SKOS vocabulary to a file
output_file = "ethereum_skos.ttl"
g.serialize(destination=output_file, format="turtle")
print(f"SKOS vocabulary has been created and saved as '{output_file}'.")