diff --git a/docs/community/topics/dependencies-js.md b/docs/community/topics/dependencies-js.md index 3887b360d..2e9979c13 100644 --- a/docs/community/topics/dependencies-js.md +++ b/docs/community/topics/dependencies-js.md @@ -8,7 +8,7 @@ There are two kinds of dependency definitions in this theme: To update or add a JS dependency, follow these steps: 1. **Edit `package.json`** by adding or modifying a dependency. -2. **Re-generate `package-lock.json`** in order to create a new set of frozen dependencies for the theme. To do this, run the following command from [the Sphinx Theme Builder](https://github.com/pradyunsg/sphinx-theme-builder). +2. **Re-generate `package-lock.json`** in order to create a new set of frozen dependencies for the theme. To do this, run the following command from the [Sphinx Theme Builder](https://sphinx-theme-builder.readthedocs.io/en/latest/). ``` stb npm install --include=dev diff --git a/docs/community/topics/manual-dev.md b/docs/community/topics/manual-dev.md index 5c952633a..6bf548897 100644 --- a/docs/community/topics/manual-dev.md +++ b/docs/community/topics/manual-dev.md @@ -18,7 +18,7 @@ To do so, use a tool like [conda](https://docs.conda.io/en/latest/), [mamba](htt Before you start, ensure that you have the following installed: - Python >= 3.9 -- [Pandoc](https://pandoc.org/installing.html): we use `nbsphinx` to support notebook (.ipynb) files in the documentation, which requires [installing Pandoc](https://pandoc.org/installing.html) at a system level (or within a Conda environment). +- [Pandoc](https://pandoc.org/): we use `nbsphinx` to support notebook (`.ipynb`) files in the documentation, which requires [installing Pandoc](https://pandoc.org/installing.html) at a system level (or within a Conda environment). ## Clone the repository locally @@ -66,7 +66,7 @@ To manually open a server to watch your documentation for changes, build them, a $ stb serve docs --open-browser ``` -## Run the tests +## Manually run the tests To manually run the tests for this theme, first set up your environment locally, and then run: diff --git a/docs/user_guide/accessibility.md b/docs/user_guide/accessibility.md index 01ca24329..3fe7d70a7 100644 --- a/docs/user_guide/accessibility.md +++ b/docs/user_guide/accessibility.md @@ -69,7 +69,7 @@ Site maps, usually served from a file called `sitemap.xml` are a broadly-employe approach to telling programs like search engines and assistive technologies where different content appears on a website. -If using a service like [ReadTheDocs](https://readthedocs.com), these files +If using a service like [ReadTheDocs](https://about.readthedocs.com/), these files will be created for you _automatically_, but for some other approaches below, it's handy to generate a `sitemap.xml` locally or in CI with a tool like [sphinx-sitemap](https://pypi.org/project/sphinx-sitemap/). diff --git a/docs/user_guide/indices.rst b/docs/user_guide/indices.rst index 6bbb9c279..cd1d29e83 100644 --- a/docs/user_guide/indices.rst +++ b/docs/user_guide/indices.rst @@ -19,4 +19,4 @@ By design the indices pages are not linked in a documentation generated with thi .. note:: - Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs `__. + Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs `__. diff --git a/tools/divergent_links.py b/tools/divergent_links.py new file mode 100644 index 000000000..ed78d7892 --- /dev/null +++ b/tools/divergent_links.py @@ -0,0 +1,136 @@ +"""This script help checking inconsistent links. + +That is to say, links that have the same title but go to different places. +This is useful for screen-reader and accessibility devices, where the user may +say "Go to X", but if there are 2 links named "X" this creates ambiguity. + + +Example (links that have the same name, but different URL): + + We have a JavaScript API and + a Python API. + +How to fix (give the links different names): + + We have a JavaScript API and + a Python API. +""" + +import os +import sys +from collections import defaultdict +from urllib.parse import urljoin + +from bs4 import BeautifulSoup + +# when looking at inconsistent links across pages, +# a number of text is recurrent and appear on many pages. +# So we'll ignore these. + +ignores = [ + "#", + "next", + "previous", + "[source]", + "edit on github", + "[docs]", + "read more ...", + "show source", + "module", +] + + +def find_html_files(folder_path): + """Find all html files in given folder.""" + html_files = [] + for root, dirs, files in os.walk(folder_path): + for file in files: + if file.endswith(".html"): + html_files.append(os.path.join(root, file)) + return html_files + + +class Checker: + """Link checker.""" + + links: dict[str, list] + + def __init__(self): + self.links = defaultdict(list) + + def scan(self, html_content, file_path): + """Scan given file for html links.""" + # Parse the HTML content using BeautifulSoup + soup = BeautifulSoup(html_content, "html.parser") + + # Dictionary to store URLs and their corresponding titles + + # Extract all anchor tags + for a_tag in soup.find_all("a", href=True): + url = a_tag["href"] + + # These are usually link into the same page ("see below", or even + # header anchors we thus exclude those. + if url.startswith("#"): + continue + content = a_tag.text.strip().lower() + if content in ignores: + continue + # Some links are "$Title\nNext", or "$Title\nprev", so we only + # want to look at what is before the `\n` + if content.split("\n")[0] in ignores: + continue + + fullurl = urljoin(file_path, url) + self.links[content].append((fullurl, file_path)) + + def duplicates(self): + """Print potential duplicates.""" + for content, url_pages in self.links.items(): + uniq_url = {u for u, _ in url_pages} + if len(uniq_url) >= 2: + print( + f'The link text "{content!r}" appears {len(url_pages)} times, ' + f"and links to {len(uniq_url)} different URLs, on the following pages:" + ) + dct = defaultdict(list) + for u, p in url_pages: + dct[u].append(p) + for u, ps in dct.items(): + print(" ", u, "in") + for p in ps: + print(" ", p) + + +if len(sys.argv) == 3 and sys.argv[2] == "--all": + c = Checker() + + for file in find_html_files(sys.argv[1]): + with open(file) as f: + data = f.read() + c.scan(data, file) + + c.duplicates() +elif len(sys.argv) == 2: + for file in find_html_files(sys.argv[1]): + with open(file) as f: + data = f.read() + c = Checker() + c.scan(data, file) + c.duplicates() +else: + print( + """ +Check page-wise link consistency +(links with the same name on the same page should go to the same URL) + + python tools/divergent_links.py docs/_build/html/ + +Check site-wide link consistency +(links with the same name across all pages should go the same URL) + + python tools/divergent_links.py docs/_build/html/ --all + +""" + ) + sys.exit(1)