forked from FamilySearch/GEDCOM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hyperlink.py
115 lines (106 loc) · 3.97 KB
/
hyperlink.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import re
def get_paths():
"""Parses command-line arguments, if present; else uses defaults"""
from sys import argv
from os.path import isfile
if len(argv) < 3:
raise Exception("Input files fillowed by an output file expected")
dest = argv.pop()
spec = [_ for _ in argv[1:] if isfile(_)]
if len(spec) < 1:
raise Exception("No input file provided")
return spec, dest
srcs, dst = get_paths()
def slugify(bit):
if '`g7:' in bit:
si = bit.rfind('`g7:')+4
ei = bit.find('`', si)
slug = bit[si:ei].replace('#','-')
elif '`' in bit:
bit = re.search('`[A-Z0-9_`.]+`', bit)
slug = bit.group(0).replace('`','').replace('.','-')
else:
slug = re.sub('[^-._a-z0-9]+','-', bit.lower())
return slug
# Step 1: find all anchors and ABNF rules
slugs = {}
abnf_rules = {}
table_tags = {}
header_row = None
for src in srcs:
with open(src) as f:
num = 0
inabnf = False
for line in f:
num += 1
if line[0] == '#':
if '`' in line and '{' not in line:
slug = slugify(line.replace("'s ",'.'))
elif '{' in line and line.find('#', line.find('{')) > 0:
slug = line[line.rfind('#')+1:]
slug = slug[:slug.find('}')]
else:
if '{' in line: line = line[:line.find('{')]
slug = slugify(line.strip('# \n\r'))
if slug in slugs:
raise Exception('Duplicate slug '+slug)
slugs[slug] = num
elif '`abnf' in line:
inabnf = True
elif inabnf and '`' in line:
inabnf = False
elif inabnf and line[0] != ' ' and '=' in line:
abnf_rules[line.split()[0]] = slug
elif not inabnf:
if header_row:
if '|' not in line: header_row = None
elif 'Tag' in header_row and '`' in line:
table_tags[line.split('`')[1]] = slug
elif '|' in line: header_row = line
last = {}
def linkable(line, num, istable=False):
"""Finds linkable items in a line of text and adds links for them"""
def linkify(txt, slug):
near = abs(slugs[slug]-num) < 20 or abs(last.get(slug,-100)-num) < 20
last[slug] = num
if near:
return '['+txt+'](#'+slug+'){.close}'
else:
return '['+txt+'](#'+slug+')'
def repl(m):
slug = slugify(m.group(0))
if slug in slugs:
return linkify(m.group(0), slug)
return m.group(0)
def abnf(m):
if m.group(1) in abnf_rules:
slug = abnf_rules[m.group(1)]
return linkify(m.group(0), slug)
if m.group(1) in table_tags:
slug = table_tags[m.group(1)]
return linkify(m.group(0), slug)
return m.group(0)
uried = re.sub(r'(?<![\[.`])`g7:[-A-Z0-9a-z`._#]+`', repl, line)
if istable: return uried
tagged = re.sub(r'(?<![\[.`])`[A-Z0-9`._#]+`', repl, uried)
abnfed = re.sub(r'(?<![\[.`])`([A-Za-z0-9]+)`', abnf, tagged)
return abnfed
# Step 2: add {#anchors} for tags; add hyperlinks
with open(dst,'w') as to:
for src in srcs:
with open(src) as f:
num = 0
for line in f:
num += 1
if line[0] == '#':
if '`' in line and '{' not in line:
slug = slugify(line.replace("'s ",'.'))
print(line.strip(), '{- #'+slug+'}', file=to)
else:
to.write(line)
elif '|' in line:
to.write(linkable(line, num, True))
# to.write(line)
else:
to.write(linkable(line, num))
# Step 3 is adding links inside gedstruct code blocks. This cannot be done in markdown, so it is handled by a separate processor for the HTML