forked from ELS-RD/transformer-deploy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mkdocs.yml
93 lines (88 loc) · 2.72 KB
/
mkdocs.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
site_name: transformer-deploy by Lefebvre Dalloz
repo_name: ELS-RD/transformer-deploy/
repo_url: https://github.com/ELS-RD/transformer-deploy/
site_description: Efficient, scalable and enterprise-grade CPU/GPU inference server for Hugging Face transformer models
copyright: Copyright © 2020 - 2021 Lefebvre Dalloz
edit_uri: ""
theme:
name: material
custom_dir: docs/overrides
palette:
scheme: slate
primary: white
accent: deep orange
# toggle:
# icon: material/weather-night
# name: Switch to light mode
# - scheme: default
# primary: black
# accent: deep orange
# toggle:
# icon: material/weather-sunny
# name: Switch to dark mode
icon:
repo: fontawesome/brands/github
logo: material/speedometer
markdown_extensions:
- admonition
- pymdownx.details
- pymdownx.superfences
- pymdownx.emoji:
emoji_index: !!python/name:materialx.emoji.twemoji
emoji_generator: !!python/name:materialx.emoji.to_svg
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- toc:
permalink: "#"
- pymdownx.critic
- pymdownx.caret
- pymdownx.mark
- pymdownx.tilde
- abbr
- attr_list
- md_in_html
plugins:
- search
- mkdocstrings:
handlers:
python:
selection:
docstring_style: restructured-text
- include-markdown
- mkdocs-jupyter:
theme: dark
ignore_h1_titles: True
- gen-files:
scripts:
- resources/gen_doc_stubs.py
- literate-nav:
nav_file: SUMMARY.md
extra:
social:
- icon: fontawesome/brands/twitter
link: https://twitter.com/pommedeterre33
- icon: fontawesome/brands/medium
link: https://medium.com/@pommedeterre33
generator: false
nav:
- Getting started: index.md
- Installation (local or Docker only): setup_local.md
- Run (1 command): run.md
- Which tool to choose for your inference?: compare.md
- How ONNX conversion works? : onnx_convert.md
- Understanding model optimization : optimizations.md
- Direct use TensorRT in Python script (no server): python.md
- GPU quantization for X2 speed-up:
- Why using quantization?: quantization/quantization_intro.md
- Quantization theory: quantization/quantization_theory.md
- How is it implemented in this library?: quantization/quantization_ast.md
- PTQ and QAT, what are they?: quantization/quantization_ptq.md
- End to end demo: quantization/quantization.ipynb
- "From optimization to deployment: end to end demo": demo.md
- "Accelerate text generation with GPT-2": gpt2.ipynb
- Benchmarks run on AWS GPU instances: benchmarks.md
- FAQ: faq.md
- API: reference/