-
Notifications
You must be signed in to change notification settings - Fork 0
/
tables.sql
913 lines (826 loc) · 25 KB
/
tables.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
-- DB-l10n. Localization of database content
-- Copyright © 2013 Basil Peace
/*
This file is part of DB-l10n.
DB-l10n is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
DB-l10n is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DB-l10n. If not, see <http://www.gnu.org/licenses/>.
*/
/*
TODOs:
# Force character case on data insert (?)
# Check language_tags in tables
# function for check whether subtag is private use
# Canonicalization — ordering of extensions and something else ?
# http://www.iana.org/assignments/language-tags/language-tags.xhtml#language-tags-1 for tests
# Check for non-circular prefixes (and preferred-value)
# Regions - hierarchy
# No special checks for macrolanguage (scope, ...)
# Full text search support
*/
-- Note on case of characters:
-- Language tags should be treated and compared case-insensitive.
-- Canonicalization of language tags regularizes the case of the subtags. All
-- comparisons evolving non-canonical tags are made in lowercase.
-- However, tags and subtags in tables are assumed to be in the regularized
-- case. It works for data imported from IANA registry, but user-defined
-- subtags should be added carefully, in proper case, or something could be
-- broken
-- Note on descriptions:
-- According to [BCP 47]:
-- 1. A particular description can be used more than once for multiple records
-- of one type, if all of them, excepting at most one, are deprecated. If
-- there is one non-deprecated, it should be used as preferred-value for all
-- deprecated records. Check of this rule isn't implemented yet
-- 2. Uniqueness of description should be treated insensitively to formatting
-- variations. This can't be reached by DBMS features and won't be
-- implemented
-- [RFC5646] ABNF includes extlang subtag into language subtag. However, we
-- store them separately
CREATE DOMAIN language_subtag character varying(8) COLLATE "C"; -- Primary language subtag
-- See [RFC5646], sect. 2.2.2., rule 4:
-- Although ABNF allows use of three consecutive extlang subtags, subtags can't
-- be Prefixes to another extlang subtags, so only use of one extlang subtag
-- is valid
CREATE DOMAIN extlang_subtag character varying(3) COLLATE "C";
CREATE DOMAIN script_subtag character varying(4) COLLATE "C";
CREATE DOMAIN region_subtag character varying(3) COLLATE "C";
CREATE DOMAIN variant_subtag character varying(8) COLLATE "C";
CREATE DOMAIN variant_subtags character varying(8)[] COLLATE "C";
CREATE DOMAIN extension_identifier character(1) COLLATE "C"; -- <> 'x'
CREATE DOMAIN extension_subtag character varying(8) COLLATE "C";
CREATE DOMAIN extension_subtags character varying(8)[] COLLATE "C";
CREATE TYPE extension AS (
-- 1. "identifier" is NOT NULL and differs from 'x'
-- 2. "subtags" have length >= 1
-- 3. Each item in "subtags" has length 2 to 8
identifier extension_identifier, -- subtag ? name ?
subtags extension_subtags
);
-- [BCP47] isn't clear whether privateuse subtag is one group of from 1 to 8
-- alphanumerical characters or is a sequence of such groups.
-- * ABNF states that privateuse subtag is a sequence starting with 'x-'
-- * Text says that each group in this sequence is separate privateuse subtag
-- We follow the latter definition, defining type for one group
CREATE DOMAIN privateuse_subtag character varying(8) COLLATE "C";
CREATE DOMAIN privateuse_subtags character varying(8)[] COLLATE "C";
CREATE DOMAIN grandfathered_tag character varying(11) COLLATE "C";
CREATE DOMAIN grandfathered_tags character varying(11)[] COLLATE "C";
CREATE TYPE language_tag AS (
-- 1. If "grandfathered" is NOT NULL, all other fields are NULL
-- 2. If "privateuse" contains at least one element, "language" can be NULL
-- 3. If "language" is NULL, all fields, except only one from "privateuse" and
-- "grandfathered", are NULL
-- 4. If "language" is NOT NULL, non-array fields ("extlang", "script" and
-- "region") can be NULL, can't be empty strings. Array fields ("variants",
-- "extensions" and "privateuse") are NOT NULL, can be zero-length
-- 5. Items of arrays of strings ("variants" and "privateuse") are always
-- NOT NULL and have length > 0
-- 6. Rules for items of "extensions" are described above
language language_subtag,
extlang extlang_subtag,
script script_subtag,
region region_subtag,
variants variant_subtags,
extensions extension[],
privateuse privateuse_subtags,
grandfathered grandfathered_tag
);
/*
A tag is considered "valid" if it satisfies these conditions:
o The tag is well-formed.
o Either the tag is in the list of grandfathered tags or all of its
primary language, extended language, script, region, and variant
subtags appear in the IANA Language Subtag Registry as of the
particular registry date.
o There are no duplicate variant subtags.
o There are no duplicate singleton (extension) subtags.
*/
CREATE VIEW grandfathered_tags_list(tag) AS
SELECT
tag::grandfathered_tag
FROM
(VALUES
('en-GB-oed'),
('i-ami'),
('i-bnn'),
('i-default'),
('i-enochian'),
('i-hak'),
('i-klingon'),
('i-lux'),
('i-mingo'),
('i-navajo'),
('i-pwn'),
('i-tao'),
('i-tay'),
('i-tsu'),
('sgn-BE-FR'),
('sgn-BE-NL'),
('sgn-CH-DE'),
('art-lojban'),
('cel-gaulish'),
('no-bok'),
('no-nyn'),
('zh-guoyu'),
('zh-hakka'),
('zh-min'),
('zh-min-nan'),
('zh-xiang')
) AS grandfathered_tags_list(tag)
;
-- Validates language_tag
-- Returns:
-- 0 = tag is valid
-- 1 = tag is well-formed, but some non-custom subtags weren't recognized
-- 5 = format of tag is invalid
CREATE FUNCTION validate_language_tag(langtag language_tag, OUT res int)
LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT
AS $$
DECLARE
variant variant_subtag;
extension extension;
extension_subtag extension_subtag;
privateuse privateuse_subtag;
BEGIN
-- TODO: maybe use RE (we need check for alpha(num) characters and also check for length)
-- We can also made functions returning regular expressions for use them here and below
res := 0;
IF langtag.grandfathered IS NOT NULL THEN
IF
EXISTS(
SELECT
*
FROM
grandfathered_tags_list
WHERE
lower(tag) = lower(langtag.grandfathered)
)
AND langtag.language IS NULL
AND langtag.extlang IS NULL
AND langtag.script IS NULL
AND langtag.region IS NULL
AND langtag.variants IS NULL
AND langtag.extensions IS NULL
AND langtag.privateuse IS NULL
THEN
RETURN 0;
ELSE
RETURN 5;
END IF;
END IF;
IF langtag.privateuse IS NULL THEN
RETURN 5;
END IF;
FOREACH privateuse IN ARRAY langtag.privateuse LOOP
IF privateuse IS NULL OT char_length(private_use) = 0 THEN
RETURN 5;
END IF;
END LOOP;
IF
langtag.language IS NULL
THEN
IF
langtag.extlang IS NULL
AND langtag.script IS NULL
AND langtag.region IS NULL
AND langtag.variants IS NULL
AND langtag.extensions IS NULL
AND array_length(langtag.privateuse, 1) > 0
THEN
RETURN 0;
ELSE
RETURN 5;
END IF;
END IF;
IF char_length(langtag.language) = 0 THEN
RETURN 5;
END IF;
IF -- TODO: custom subtags
NOT EXISTS(
SELECT
*
FROM
languages
WHERE
lower(subtag) = lower(langtag.language)
)
THEN
res := 1;
END IF;
-- TODO: check length
IF langtag.extlang IS NOT NULL THEN
IF char_length(langtag.extlang) = 0 THEN
RETURN 5;
END IF;
IF -- TODO: custom subtags
NOT EXISTS(
SELECT
*
FROM
extlangs
WHERE
lower(subtag) = lower(langtag.extlang)
)
THEN
res := 1;
END IF;
END IF;
-- TODO: check length
IF langtag.script IS NOT NULL THEN
IF char_length(langtag.script) = 0 THEN
RETURN 5;
END IF;
IF -- TODO: custom subtags
NOT EXISTS(
SELECT
*
FROM
scripts
WHERE
lower(subtag) = lower(langtag.script)
)
THEN
res := 1;
END IF;
END IF;
-- TODO: check length
IF langtag.region IS NOT NULL THEN
IF char_length(langtag.region) = 0 THEN
RETURN 5;
END IF;
IF -- TODO: custom subtags
NOT EXISTS(
SELECT
*
FROM
regions
WHERE
lower(subtag) = lower(langtag.region)
)
THEN
res := 1;
END IF;
END IF;
IF langtag.variants IS NULL THEN
RETURN 5;
END IF;
FOREACH variant IN ARRAY langtag.variants LOOP
IF variant IS NULL OR NOT (char_length(variant) BETWEEN 5 AND 8) THEN
RETURN 5;
END IF;
IF -- TODO: custom subtags
NOT EXISTS(
SELECT
*
FROM
variants
WHERE
lower(subtag) = lower(variant)
)
THEN
res := 1;
END IF;
END LOOP;
-- TODO: CHECK extensions
-- FOREACH extension IN ARRAY langtag.extensions LOOP
-- IF NOT
-- IF variant IS NULL OR NOT (char_length(variant) BETWEEN 5 AND 8) THEN
-- RETURN 5;
-- END IF;
-- IF -- TODO: custom subtags
-- NOT EXISTS(
-- SELECT
-- *
-- FROM
-- variants
-- WHERE
-- lower(subtag) = lower(variant)
-- )
-- THEN
-- res := 1;
-- END IF;
-- END LOOP;
END
$$;
CREATE FUNCTION str_to_extension(str character varying, OUT res extension)
LANGUAGE plpgsql IMMUTABLE RETURNS NULL ON NULL INPUT
AS $$
DECLARE
arr character varying[] COLLATE "C";
BEGIN
arr := regexp_matches(str COLLATE "C", '^([0-9a-wy-z])((?:-[0-9A-Za-z]{2,8})+)$', 'ix');
res.identifier := arr[1];
res.subtags := string_to_array(arr[2], '-');
res.subtags := res.subtags[2:array_length(res.subtags, 1)];
END
$$;
CREATE FUNCTION str_to_extensions(str character varying, OUT res extension[])
LANGUAGE plpgsql IMMUTABLE -- RETURNS empty array on NULL input
AS $$
DECLARE
ext_str character varying COLLATE "C";
BEGIN
res := ARRAY[]::extension[];
LOOP
ext_str := (regexp_matches(str COLLATE "C", '^(-[0-9a-wy-z](?:-[0-9A-Za-z]{2,8})+)', 'ix'))[1];
EXIT WHEN ext_str IS NULL;
res := res || str_to_extension(substr(ext_str, 2)); -- Removing '-' prefix
str := substr(str, char_length(ext_str) + 1);
END LOOP;
END
$$;
CREATE FUNCTION str_to_langtag(str character varying, OUT res language_tag)
LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT
AS $$
DECLARE
arr character varying[] COLLATE "C";
BEGIN
-- Well-formedness doesn't depend on maybe wrong or incomplete content
-- of tables
IF EXISTS(
SELECT
*
FROM
grandfathered_tags_list
WHERE
lower(tag) = lower(str COLLATE "C")
) THEN
res.grandfathered := str;
ELSE
arr := regexp_matches(str COLLATE "C", '^
(?: # langtag
(?:
([a-z]{2,3})(?:-([a-z]{3}))? # language [-extlang]
|([a-z]{4,8})
)
(?:-([a-z]{4}))? # [-script]
(?:-([a-z]{2}|[0-9]{3}))? # [-region]
((?:-(?:[a-z]{5,8}|[0-9][0-9a-z]{3}))*)? # *(-variant)
((?:-[0-9a-wy-z](?:-[0-9a-z]{2,8})+)*)? # *(-extension)
(?:-x((?:-[0-9a-z]{1,8})+))? # *(-x-privateuse)
|
(?:x((?:-[0-9a-z]{1,8})+))? # x-privateuse
)
$', 'ix');
IF arr IS NULL THEN
RAISE EXCEPTION 'Invalid language tag: "%"', str; -- TODO
END IF;
res.language := COALESCE(arr[1], arr[3]);
res.extlang := arr[2];
res.script := arr[4];
res.region := arr[5];
res.variants := string_to_array(arr[6], '-');
IF res.variants IS NOT NULL THEN
res.variants := res.variants[2:array_length(res.variants, 1)];
END IF;
res.extensions := str_to_extensions(arr[7]);
res.privateuse := string_to_array(COALESCE(arr[8], arr[9]), '-');
IF res.privateuse IS NOT NULL THEN
res.privateuse := res.privateuse[2:array_length(res.privateuse, 1)];
END IF;
END IF;
END
$$;
CREATE FUNCTION extension_to_str(extension extension) RETURNS character varying
LANGUAGE plpgsql IMMUTABLE RETURNS NULL ON NULL INPUT
AS $$
BEGIN
RETURN (extension).identifier || '-' || array_to_string((extension).subtags, '-');
END
$$;
CREATE FUNCTION extensions_to_str(extensions extension[]) RETURNS character varying
LANGUAGE plpgsql IMMUTABLE RETURNS NULL ON NULL INPUT
AS $$
DECLARE
ext extension;
res_arr character varying[] COLLATE "C" = ARRAY[]::character varying[] COLLATE "C";
BEGIN
FOREACH ext IN ARRAY extensions LOOP
res_arr := res_arr || extension_to_str(ext);
END LOOP;
RETURN array_to_string(res_arr, '-');
END
$$;
-- Converts language_tag to string
-- TODO: Does is suppose that language_tag is valid (or maybe canonicalized)?
CREATE FUNCTION langtag_to_str(langtag language_tag) RETURNS character varying
LANGUAGE plpgsql IMMUTABLE RETURNS NULL ON NULL INPUT
AS $$
BEGIN
IF langtag.grandfathered IS NOT NULL THEN
RETURN langtag.grandfathered;
END IF;
IF langtag.language IS NULL THEN
RETURN 'x-' || array_to_string(langtag.privateuse, '-');
END IF;
RETURN
langtag.language
|| COALESCE('-' || langtag.extlang, '')
|| COALESCE('-' || langtag.script, '')
|| COALESCE('-' || langtag.region, '')
|| CASE
WHEN array_length(langtag.variants, 1) > 0
THEN '-' || array_to_string(langtag.variants, '-')
ELSE ''
END
|| CASE
WHEN array_length(langtag.extensions, 1) > 0
THEN '-' || extensions_to_str(langtag.extensions)
ELSE ''
END
|| CASE
WHEN array_length(langtag.privateuse, 1) > 0
THEN '-x-' || array_to_string(langtag.privateuse, '-')
ELSE ''
END
;
END
$$;
CREATE FUNCTION canonicalize_language_tag_to_canonical_form(langtag language_tag, OUT res language_tag)
LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT
AS $$
DECLARE
i int;
j int;
primary_langtag language_tag;
BEGIN
-- TODO:
-- Handle 'und', 'ZZ', etc.
res := langtag;
res.language := lower(res.language);
res.extlang := lower(res.extlang);
res.script := initcap(res.script);
res.region := upper(res.region);
FOR i IN 1 .. array_length(res.variants, 1) LOOP
res.variants[i] := lower(res.variants[i]);
END LOOP;
FOR i IN 1 .. array_length(res.extensions, 1) LOOP
res.extensions[i].identifier := lower(res.extensions[i].identifier);
FOR j IN 1 .. array_length(res.extensions[i].subtags, 1) LOOP
res.extensions[i].subtags[j] := lower(res.extensions[i].subtags[j]);
END LOOP;
END LOOP;
FOR i IN 1 .. array_length(res.privateuse, 1) LOOP
res.privateuse[i] := lower(privateuse[i]);
END LOOP;
res.grandfathered := lower(res.grandfathered);
IF res.extensions IS NOT NULL THEN
res.extensions := array_agg(SELECT * FROM unnest(res.extensions) AS t ORDER BY t.indentifier);
END IF;
res := COALESCE(SELECT preferred_value FROM redundants WHERE tag = res, res);
res := COALESCE(SELECT preferred_value FROM grandfathereds WHERE tag = res.grandfathered, res);
primary_langtag := SELECT preferred_value FROM extlangs WHERE subtag = res.extlang;
IF FOUND THEN
res.extlang := NULL;
res.language := primary_langtag.language;
END IF;
res.script := COALESCE(SELECT preferred_value FROM scripts WHERE subtag = res.script, res.script);
res.region := COALESCE(SELECT preferred_value FROM scripts WHERE subtag = res.region, res.region);
FOR i IN 1 .. array_length(res.variants, 1) LOOP
res.variants[i] := COALESCE(SELECT preferred_value FROM variants WHERE subtag = res.variants[i], res.variants[i]);
END LOOP;
-- TODO: Handle Suppress-Script ?
IF res.region = '001' THEN -- Ignore region-neutral ('World') tag
res.region := NULL;
END IF;
-- TODO: res.extlang can still be NOT NULL (see also the next function)
END
$$;
CREATE FUNCTION canonicalize_language_tag_to_extlang_form(langtag language_tag, OUT res language_tag)
LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT
AS $$
DECLARE
language language_subtag;
BEGIN
-- 1. Canonicalize language tag
res := canonicalize_language_tag_to_canonical_form(langtag);
-- 2. If language subtag is also extlang subtag, then
-- prepend tag with extlang prefix
-- res.language is already in lowercase
SELECT prefix FROM extlangs WHERE extlang = res.language INTO language;
IF FOUND THEN
res.extlang := res.language;
res.language := language;
END IF;
END
$$;
CREATE TYPE scope_enum AS ENUM (
-- [BCP 47] Omission of scope field (i.e. NULL) means individual language
'macrolanguage',
'collection',
'special',
'private-use'
);
CREATE TABLE scripts (
subtag script_subtag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value script_subtag
REFERENCES scripts
ON UPDATE CASCADE ON DELETE RESTRICT,
CHECK (preferred_value IS NULL OR deprecated IS NOT NULL)
);
CREATE TABLE script_descriptions (
id serial PRIMARY KEY,
subtag script_subtag NOT NULL
REFERENCES scripts
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE script_comments (
id serial PRIMARY KEY,
subtag script_subtag NOT NULL
REFERENCES scripts
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
CREATE TABLE languages (
subtag language_subtag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value language_subtag
REFERENCES languages
ON UPDATE CASCADE ON DELETE RESTRICT,
CHECK (preferred_value IS NULL OR deprecated IS NOT NULL),
suppress_script script_subtag
REFERENCES scripts
ON UPDATE CASCADE ON DELETE RESTRICT,
macrolanguage language_subtag
REFERENCES languages
ON UPDATE CASCADE ON DELETE RESTRICT,
scope scope_enum
);
CREATE TABLE language_descriptions (
id serial PRIMARY KEY,
subtag language_subtag NOT NULL
REFERENCES languages
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE language_comments (
id serial PRIMARY KEY,
subtag language_subtag NOT NULL
REFERENCES languages
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
CREATE TABLE extlangs (
subtag extlang_subtag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value language_tag NOT NULL, -- TO THINK
prefix language_tag NOT NULL, -- TODO: check
suppress_script script_subtag
REFERENCES scripts
ON UPDATE CASCADE ON DELETE RESTRICT,
macrolanguage language_subtag
REFERENCES languages
ON UPDATE CASCADE ON DELETE RESTRICT,
-- Although Scope field may present in extlang, it's meaning is not entirely
-- clear (since extlangs can't be prefixes to another extlangs, scope
-- probably should not be macrolanguage or collection). Currently none of
-- extlangs in IANA registry have defined Scope field. So, no CHECKs are
-- currently provided. Regardless of that, private-use scope has meaning
scope scope_enum
);
CREATE TABLE extlang_descriptions (
id serial PRIMARY KEY,
subtag extlang_subtag NOT NULL
REFERENCES extlangs
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE extlang_comments (
id serial PRIMARY KEY,
subtag extlang_subtag NOT NULL
REFERENCES extlangs
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
CREATE TABLE regions (
subtag region_subtag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value region_subtag
REFERENCES regions
ON UPDATE CASCADE ON DELETE RESTRICT,
CHECK (preferred_value IS NULL OR deprecated IS NOT NULL)
);
CREATE TABLE region_descriptions (
id serial PRIMARY KEY,
subtag region_subtag NOT NULL
REFERENCES regions
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE region_comments (
id serial PRIMARY KEY,
subtag region_subtag NOT NULL
REFERENCES regions
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
CREATE TABLE variants (
subtag variant_subtag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value variant_subtag
REFERENCES variants
ON UPDATE CASCADE ON DELETE RESTRICT,
CHECK (preferred_value IS NULL OR deprecated IS NOT NULL)
);
CREATE TABLE variant_prefixes (
subtag variant_subtag NOT NULL
REFERENCES variants (subtag)
ON UPDATE CASCADE ON DELETE CASCADE,
prefix language_tag NOT NULL, -- TODO: check
PRIMARY KEY (subtag, prefix)
);
CREATE TABLE variant_descriptions (
id serial PRIMARY KEY,
subtag variant_subtag NOT NULL
REFERENCES variants
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE variant_comments (
id serial PRIMARY KEY,
subtag variant_subtag NOT NULL
REFERENCES variants
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
CREATE TABLE grandfathereds (
tag grandfathered_tag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value language_tag, -- TO THINK
CHECK (preferred_value IS NULL OR deprecated IS NOT NULL)
);
CREATE TABLE grandfathered_descriptions (
id serial PRIMARY KEY,
tag grandfathered_tag NOT NULL
REFERENCES grandfathereds
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE grandfathered_comments (
id serial PRIMARY KEY,
tag grandfathered_tag NOT NULL
REFERENCES grandfathereds
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
CREATE TABLE redundants (
tag language_tag PRIMARY KEY,
added date NOT NULL,
deprecated date,
preferred_value language_tag, -- TO THINK
CHECK (preferred_value IS NULL OR deprecated IS NOT NULL)
);
CREATE TABLE redundant_descriptions (
id serial PRIMARY KEY,
tag language_tag NOT NULL
REFERENCES redundants
ON UPDATE CASCADE ON DELETE CASCADE,
description text NOT NULL
);
CREATE TABLE redundant_comments (
id serial PRIMARY KEY,
tag language_tag NOT NULL
REFERENCES redundants
ON UPDATE CASCADE ON DELETE CASCADE,
comment text NOT NULL
);
-- Records for extensions are stored in the separate registry,
-- with the following differences from records for other subtags:
-- 1. There is no Deprecated field
-- 2. There is only one Description field
-- 3. There could be at most one Comments field
CREATE TABLE extensions (
identifier extension_identifier PRIMARY KEY, -- <> 'x'
added date NOT NULL,
description text NOT NULL,
comments text,
-- TODO: Add length limits
rfc character varying NOT NULL,
authority character varying NOT NULL,
contacting_email character varying NOT NULL,
mailing_list character varying NOT NULL,
url character varying NOT NULL
);
CREATE TYPE langtag_match_type AS ENUM (
-- TO ADD:
-- Macrolanguage/collection match
'Exact match',
-- 'Partial privateuse match',
'Extension match',
-- 'Partial extension match',
'Variant match',
-- 'Partial variant match',
'Region match',
'Macro region match', -- Except 001 World
'Region-neutral match',
'Orthographic affinity match',
'Preferred region match',
-- According to [BCP47], extensions are orthogonal to language tag matching
-- However, some extensions (namely 't' extension for Transformed Content)
-- is not orthogonal, and sometimes it can be desired to have looked langtag
-- with extension 't' matched compared langtag without it (if application
-- can't exactly provide asked transformation)
'Extension neutral match',
'Any region match',
'Any language match',
'Script mismatch',
'No match'
);
CREATE FUNCTION compare_langtags(langtag1 language_tag, langtag2 language_tag)
RETURNS langtag_match_type
LANGUAGE plpgsql STABLE RETURNS NULL ON NULL INPUT
AS $$
BEGIN
END
$$;
CREATE OPERATOR = (
LEFTARG = language_tag,
RIGHTARG = language_tag,
PROCEDURE = compare_langtags,
COMMUTATOR = =
);
CREATE TABLE orthographic_affinity(
langtag language_tag NOT NULL PRIMARY KEY,
affilangtag language_tag
);
CREATE FUNCTION localize_table(
table_name name,
columns name[],
table_nsp_name name DEFAULT current_schema()
) RETURNS VOID
LANGUAGE plpgsql VOLATILE -- TODO: RETURNS NULL ON NULL INPUT
AS $$
DECLARE
table_oid oid;
table_nsp_oid oid;
i RECORD;
BEGIN
/*
TODOs:
1. Nonexisting columns in `columns`
2. Check of datatype (character varying, text) (DOMAINS ?) (composite types ?)
3. Indices ?
4. Collation ?
*/
-- IF
-- array_length(columns) = 0
-- THEN
SELECT
oid
FROM
pg_namespace
WHERE
nspname = table_nsp_name
INTO STRICT
table_nsp_oid;
SELECT
oid
FROM
pg_class
WHERE
relname = table_name
AND relnamespace = table_nsp_oid
INTO STRICT
table_oid;
-- We rely on PostgreSQL's handling of types (column pg_attribute.atttypmod and function format_type)
-- Especially, number of dimensions in arrays don't checked since ...
EXECUTE '
CREATE TABLE '||COALESCE(quote_ident(table_nsp_name)||'.', '')||quote_ident(table_name||'_l10n')+' (
langtag '||l10n||'.language_tag PRIMARY KEY,
'||(
SELECT
string_agg(column_name_type, ', ')
FROM (
SELECT
quote_ident(attname)||' '|| format_type(atttypid, atttypmod) AS column_name_type
FROM
pg_attribute
WHERE
attrelid = table_oid
AND attname IN (unnest(columns))
ORDER BY
attnum
) AS columns
)
||');'
;
FOR i IN SELECT attname, format_type(atttypid, atttypmod) FROM pg_attribute WHERE attrelid = table_oid AND attname IN (unnest(columns)) ORDER BY attnum LOOP
END LOOP;
END
$$;