-
Notifications
You must be signed in to change notification settings - Fork 0
/
artifacts.py
115 lines (106 loc) · 2.9 KB
/
artifacts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import re
placeholder_tokens = [f"<SENTINEL_{idx}>" for idx in range(64)]
composer_tokens = [
"<SCRIABIN>",
"<FRANCK>",
"<MOZART>",
"<CHOPIN>",
"<MENDELSSON>",
"<LISZT>",
"<SCHUBERT>",
"<BRAHMS>",
"<HAYDN>",
"<BEETHOVEN>",
"<BALAKIREV>",
"<SCHUMANN>",
"<RACHMANIOFF>",
"<UNKNOWN_COMPOSER>",
"<BACH>",
]
special_tokens = (
[
"<PAD>",
"<CLS>",
"<EOS>",
"<SENTINEL_78>",
"<SENTINEL_79>",
"<SENTINEL_80>",
"<SENTINEL_81>",
"<SENTINEL_82>",
"<SENTINEL_83>",
"<SENTINEL_84>",
"<SENTINEL_85>",
"<SENTINEL_86>",
"<SENTINEL_87>",
"<SENTINEL_88>",
"<SENTINEL_89>",
"<SENTINEL_90>",
"<SENTINEL_91>",
"<SENTINEL_92>",
"<SENTINEL_93>",
"<SENTINEL_94>",
"<SENTINEL_95>",
"<SCORES>",
"<PERFORMANCE>",
"<CLEAN_TIME>",
"<CLEAN_EVERYTHING>",
"<CLEAN_VOLUME>",
"<CLEAN_PITCH>",
"<LOW_FROM_MEDIAN>",
"<HIGH_FROM_MEDIAN>",
"<ABOVE_LOW_QUARTILE>",
"<BELOW_LOW_QUARTILE>",
"<ABOVE_HIGH_QUARTILE>",
"<BELOW_HIGH_QUARTILE>",
"<MIDDLE_QUARTILES>",
"<EXTREME_QUARTILES>",
"<LOUD>",
"<SOFT>",
"<ABOVE_VERY_SOFT>",
"<VERY_SOFT>",
"<VERY_LOUD>",
"<BELOW_VERY_LOUD>",
"<MODERATE_VOLUME>",
"<EXTREME_VOLUME>",
"<CLEAN>",
"<NOISY_VOLUME>",
"<NOISY_PITCH>",
"<NOISY_START_TIME>",
"<NOISY_TIME>",
"<NOISY>",
]
+ composer_tokens
+ placeholder_tokens
)
composer_token_map: dict[str, str] = {
"Alexander Scriabin": "<SCRIABIN>",
"César Franck": "<FRANCK>",
"Wolfgang Amadeus Mozart": "<MOZART>",
"Frédéric Chopin": "<CHOPIN>",
"Felix Mendelssohn": "<MENDELSSON>",
"Franz Liszt": "<LISZT>",
"Franz Schubert": "<SCHUBERT>",
"Johannes Brahms": "<BRAHMS>",
"Joseph Haydn": "<HAYDN>",
"Ludwig van Beethoven": "<BEETHOVEN>",
"Mily Balakirev": "<BALAKIREV>",
"Robert Schumann": "<SCHUMANN>",
"Sergei Rachmaninoff": "<RACHMANIOFF>",
"Johann Sebastian Bach": "<BACH>",
}
def create_composer_regex_map() -> dict[re.Pattern, str]:
regex_map: dict[re.Pattern, str] = {}
for full_name, token in composer_token_map.items():
names = full_name.split()
surname = names[-1]
pattern = re.compile(rf"\b{re.escape(surname)}\b", re.IGNORECASE)
regex_map[pattern] = token
return regex_map
composer_regex_map: dict[re.Pattern, str] = create_composer_regex_map()
def get_composer_token(composer: str) -> str:
matches: list[tuple[re.Match, str]] = [
(match, token) for pattern, token in composer_regex_map.items() if (match := pattern.search(composer))
]
if len(matches) == 1:
return matches[0][1]
return "<UNKNOWN_COMPOSER>"