forked from EricLBuehler/mistral.rs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_uqff_card.py
105 lines (83 loc) · 3.34 KB
/
generate_uqff_card.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Generate a UQFF Hugging Face model card .md file.
msg = "This script is used to generate a Hugging Face model card."
print("-" * len(msg))
print(msg)
print("-" * len(msg))
model_id = input("Please enter the original model ID: ")
display_model_id = input(
"Please enter the model ID where this model card will be displayed: "
)
is_vision = input("Is this a vision model (yes/no): ").strip().lower() == "yes"
if is_vision:
arch = input("What is the vision model architecture?: ").strip().lower()
output = f"""---
tags:
- uqff
- mistral.rs
base_model: {model_id}
base_model_relation: quantized
---
<!-- Autogenerated from user input. -->
"""
output += f"# `{model_id}`, UQFF quantization\n\n"
output += """
Run with [mistral.rs](https://github.com/EricLBuehler/mistral.rs). Documentation: [UQFF docs](https://github.com/EricLBuehler/mistral.rs/blob/master/docs/UQFF.md).
1) **Flexible** 🌀: Multiple quantization formats in *one* file format with *one* framework to run them all.
2) **Reliable** 🔒: Compatibility ensured with *embedded* and *checked* semantic versioning information from day 1.
3) **Easy** 🤗: Download UQFF models *easily* and *quickly* from Hugging Face, or use a local file.
3) **Customizable** 🛠️: Make and publish your own UQFF files in minutes.
"""
print(" NOTE: Getting metadata now, press CTRL-C when you have entered all files")
print(
" NOTE: If multiple quantizations were used: enter the quantization names, and then in the next prompt, the topology file used."
)
output += f"\n## Examples\n"
output += "|Quantization type(s)|Example|\n|--|--|\n"
topologies = {}
try:
n = 0
while True:
print(
f" NOTE: Next file. Have processed {n} files. Press CTRL-C now if there are no more."
)
file = input("Enter UQFF filename (with extension): ").strip()
quants = input(
"Enter quantization NAMES used to make that file (single quantization name, OR if multiple, comma delimited): "
)
if "," in quants:
quants = list(map(lambda x: x.strip().upper(), quants.split(",")))
topology = input(
"Enter topology used to make UQFF with multiple quantizations: "
)
topologies[file] = topology
output += f"|{",".join(quants)} (see topology for this file)|"
else:
output += f"|{quants.strip().upper()}|"
if is_vision:
cmd = "vision-plain"
else:
cmd = "plain"
if is_vision:
arch = f"-a {arch}"
else:
arch = ""
output += f"`./mistralrs-server -i {cmd} -m {display_model_id} {arch} --from-uqff {file}`|\n"
n += 1
print()
except KeyboardInterrupt:
pass
if n == 0:
raise ValueError("Need at least one file")
if len(topologies):
output += "\n\n## Topologies\n**The following model topologies were used to generate this UQFF file. Only information pertaining to ISQ is relevant.**\n"
for name, file in topologies.items():
with open(file, "r") as f:
output += f"### Used for `{name}`\n\n"
output += f"```yml\n{f.read()}\n```\n"
msg = "Done! Please enter the output filename"
print("\n" + "-" * len(msg))
print(msg)
print("-" * len(msg))
out = input("Enter the output filename: ")
with open(out, "a") as f:
f.write(output)