diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5ed1fee9cd..cb1c92bec1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,8 @@ jobs: python-version: 3.9 - name: Install Python dependencies uses: py-actions/py-dependency-install@v4 + - name: Install Python libs + run: pip3 install -r ./requirements.txt - uses: ruby/setup-ruby@v1 with: ruby-version: 3.2 diff --git a/.gitmodules b/.gitmodules index 60c9ade065..4532ebbdf2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,7 @@ path = lib/pico-examples url = https://github.com/raspberrypi/pico-examples.git branch = master + +[submodule "doxygentoasciidoc"] + path = doxygentoasciidoc + url = https://github.com/raspberrypi/doxygentoasciidoc.git diff --git a/Makefile b/Makefile index 2341a582c9..282428ce17 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ PICO_SDK_DIR = lib/pico-sdk PICO_EXAMPLES_DIR = lib/pico-examples ALL_SUBMODULE_CMAKELISTS = $(PICO_SDK_DIR)/CMakeLists.txt $(PICO_EXAMPLES_DIR)/CMakeLists.txt DOXYGEN_PICO_SDK_BUILD_DIR = build-pico-sdk-docs -DOXYGEN_HTML_DIR = $(DOXYGEN_PICO_SDK_BUILD_DIR)/docs/doxygen/html +DOXYGEN_XML_DIR = $(DOXYGEN_PICO_SDK_BUILD_DIR)/combined/docs/doxygen/xml # The pico-sdk here needs to match up with the "from_json" entry in index.json ASCIIDOC_DOXYGEN_DIR = $(ASCIIDOC_DIR)/pico-sdk @@ -50,33 +50,42 @@ $(PICO_SDK_DIR)/CMakeLists.txt $(PICO_SDK_DIR)/docs/index.h: | $(PICO_SDK_DIR) $(PICO_EXAMPLES_DIR)/CMakeLists.txt: | $(PICO_SDK_DIR)/CMakeLists.txt $(PICO_EXAMPLES_DIR) git submodule update --init $(PICO_EXAMPLES_DIR) +# Initialise doxygentoasciidoc submodule +doxygentoasciidoc/__main__.py: + git submodule update --init doxygentoasciidoc + fetch_submodules: $(ALL_SUBMODULE_CMAKELISTS) # Get rid of the submodules clean_submodules: git submodule deinit --all -# Create the pico-sdk Doxygen HTML files -$(DOXYGEN_HTML_DIR): | $(ALL_SUBMODULE_CMAKELISTS) $(DOXYGEN_PICO_SDK_BUILD_DIR) - cmake -S $(PICO_SDK_DIR) -B $(DOXYGEN_PICO_SDK_BUILD_DIR) -DPICO_EXAMPLES_PATH=../$(PICO_EXAMPLES_DIR) - $(MAKE) -C $(DOXYGEN_PICO_SDK_BUILD_DIR) docs - test -d "$@" +# Create the pico-sdk Doxygen XML files +$(DOXYGEN_XML_DIR) $(DOXYGEN_XML_DIR)/index.xml: | $(ALL_SUBMODULE_CMAKELISTS) $(DOXYGEN_PICO_SDK_BUILD_DIR) + cmake -S $(PICO_SDK_DIR) -B $(DOXYGEN_PICO_SDK_BUILD_DIR)/combined -D PICO_EXAMPLES_PATH=../$(PICO_EXAMPLES_DIR) -D PICO_PLATFORM=combined-docs + cmake -S $(PICO_SDK_DIR) -B $(DOXYGEN_PICO_SDK_BUILD_DIR)/PICO_RP2040 -D PICO_EXAMPLES_PATH=../$(PICO_EXAMPLES_DIR) -D PICO_PLATFORM=rp2040 + cmake -S $(PICO_SDK_DIR) -B $(DOXYGEN_PICO_SDK_BUILD_DIR)/PICO_RP2350 -D PICO_EXAMPLES_PATH=../$(PICO_EXAMPLES_DIR) -D PICO_PLATFORM=rp2350 + $(MAKE) -C $(DOXYGEN_PICO_SDK_BUILD_DIR)/combined docs + $(MAKE) -C $(DOXYGEN_PICO_SDK_BUILD_DIR)/PICO_RP2040 docs + $(MAKE) -C $(DOXYGEN_PICO_SDK_BUILD_DIR)/PICO_RP2350 docs + python3 $(SCRIPTS_DIR)/postprocess_doxygen_xml.py $(DOXYGEN_PICO_SDK_BUILD_DIR) -$(DOXYGEN_PICO_SDK_BUILD_DIR)/docs/Doxyfile: | $(DOXYGEN_HTML_DIR) +$(DOXYGEN_PICO_SDK_BUILD_DIR)/combined/docs/Doxyfile: | $(DOXYGEN_XML_DIR) -build_doxygen_html: | $(DOXYGEN_HTML_DIR) +build_doxygen_xml: | $(DOXYGEN_XML_DIR) # Clean all the Doxygen HTML files -clean_doxygen_html: +clean_doxygen_xml: rm -rf $(DOXYGEN_PICO_SDK_BUILD_DIR) -# Create the Doxygen asciidoc files -# Also need to move index.adoc to a different name, because it conflicts with the autogenerated index.adoc -$(ASCIIDOC_DOXYGEN_DIR)/picosdk_index.json $(ASCIIDOC_DOXYGEN_DIR)/index_doxygen.adoc: $(SCRIPTS_DIR)/transform_doxygen_html.py $(PICO_SDK_DIR)/docs/index.h $(DOXYGEN_PICO_SDK_BUILD_DIR)/docs/Doxyfile | $(DOXYGEN_HTML_DIR) $(ASCIIDOC_DOXYGEN_DIR) +# create the sdk adoc and the json file +$(ASCIIDOC_DOXYGEN_DIR)/picosdk_index.json $(ASCIIDOC_DOXYGEN_DIR)/index_doxygen.adoc: $(ASCIIDOC_DOXYGEN_DIR) $(DOXYGEN_XML_DIR)/index.xml doxygentoasciidoc/__main__.py doxygentoasciidoc/cli.py doxygentoasciidoc/nodes.py doxygentoasciidoc/helpers.py | $(BUILD_DIR) $(MAKE) clean_ninja - $< $(DOXYGEN_HTML_DIR) $(ASCIIDOC_DOXYGEN_DIR) $(PICO_SDK_DIR)/docs/index.h $(ASCIIDOC_DOXYGEN_DIR)/picosdk_index.json - cp $(DOXYGEN_HTML_DIR)/*.png $(ASCIIDOC_DOXYGEN_DIR) - mv $(ASCIIDOC_DOXYGEN_DIR)/index.adoc $(ASCIIDOC_DOXYGEN_DIR)/index_doxygen.adoc + python3 -m doxygentoasciidoc -f $(DOXYGEN_XML_DIR)/index.xml > $(ASCIIDOC_DOXYGEN_DIR)/all_groups.adoc + python3 -m doxygentoasciidoc -f $(DOXYGEN_XML_DIR)/indexpage.xml -c > $(ASCIIDOC_DOXYGEN_DIR)/index_doxygen.adoc + python3 -m doxygentoasciidoc -f $(DOXYGEN_XML_DIR)/examples_page.xml -c > $(ASCIIDOC_DOXYGEN_DIR)/examples_page.adoc + python3 $(SCRIPTS_DIR)/postprocess_doxygen_adoc.py $(ASCIIDOC_DOXYGEN_DIR) + -cp $(DOXYGEN_XML_DIR)/*.png $(ASCIIDOC_DOXYGEN_DIR) build_doxygen_adoc: $(ASCIIDOC_DOXYGEN_DIR)/index_doxygen.adoc diff --git a/doxygentoasciidoc b/doxygentoasciidoc new file mode 160000 index 0000000000..b771d544ec --- /dev/null +++ b/doxygentoasciidoc @@ -0,0 +1 @@ +Subproject commit b771d544ec133e1d91be3f617b60183ecff2b0be diff --git a/jekyll-assets/css/style.css b/jekyll-assets/css/style.css index f0d1f4ca74..6f123e9bae 100644 --- a/jekyll-assets/css/style.css +++ b/jekyll-assets/css/style.css @@ -707,6 +707,53 @@ div.videoblock iframe { /* DOXYGEN ELEMENTS */ +.contexttag { + display: inline-block; + font-size: 0.8em; + line-height: 1em; + font-weight: bold; + background-color: orange; + color: #ffffff; + border-radius: 0.5em; + padding-left: 0.5em; + padding-right: 0.5em; + padding-top: 1px; + padding-bottom: 1px; +} + +.contexttag.RP2040 { + background-color: #50C878; +} + +div.listingblock pre.highlight { + margin-top: 0px; + margin-bottom: 0px; +} + +#content div.listingblock table.linenotable { + margin-bottom: 0px; +} + +#content td.hdlist1 { + line-height: 1.5em; +} + +#content td.hdlist2 > p { + margin-bottom: 0px; +} + +#content td.linenos { + padding-right: 10px; +} + +.highlight td.code pre { + background-color: transparent; + margin-top: 0px; + margin-bottom: 0px; +} + +/* OLD DOXYGEN ELEMENTS */ + div.memproto { background-color: #dedede; padding: 7px; diff --git a/jekyll-assets/scripts/copy-to-clipboard.js b/jekyll-assets/scripts/copy-to-clipboard.js index 71021d9de2..b1582ca4d5 100644 --- a/jekyll-assets/scripts/copy-to-clipboard.js +++ b/jekyll-assets/scripts/copy-to-clipboard.js @@ -28,7 +28,7 @@ var hideTooltip = function() { }; var extractDoxygenCode = function(node) { - var lines = node.querySelectorAll("div.line"); + var lines = node.querySelectorAll("div.code"); var preText = ""; for (var i = 0; i < lines.length; i++) { var myText = lines[i].textContent; @@ -45,8 +45,9 @@ for (var i = 0; i < buttons.length; i++) { window.addEventListener('load', function() { var clipboard = new ClipboardJS('.copy-button', { text: function(trigger) { - if (trigger.parentNode.querySelector('div.line')) { - var text = extractDoxygenCode(trigger.parentNode); + if (trigger.parentNode.querySelector('td.code')) { + // var text = extractDoxygenCode(trigger.parentNode); + var text = trigger.parentNode.querySelector('td.code pre').textContent; } else { var text = trigger.parentNode.querySelector('pre').textContent; diff --git a/lib/pico-examples b/lib/pico-examples index eca13acf57..7fe60d6b40 160000 --- a/lib/pico-examples +++ b/lib/pico-examples @@ -1 +1 @@ -Subproject commit eca13acf57916a0bd5961028314006983894fc84 +Subproject commit 7fe60d6b4027771e45d97f207532c41b1d8c5418 diff --git a/lib/pico-sdk b/lib/pico-sdk index 6a7db34ff6..efe2103f9b 160000 --- a/lib/pico-sdk +++ b/lib/pico-sdk @@ -1 +1 @@ -Subproject commit 6a7db34ff63345a7badec79ebea3aaef1712f374 +Subproject commit efe2103f9b28458a1615ff096054479743ade236 diff --git a/requirements.txt b/requirements.txt index 1dae939022..f9610bf14a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ pyyaml == 6.0.1 lxml +beautifulsoup4 diff --git a/scripts/create_auto_ninjabuild.py b/scripts/create_auto_ninjabuild.py index 66dc12235a..51ac5cc5a0 100755 --- a/scripts/create_auto_ninjabuild.py +++ b/scripts/create_auto_ninjabuild.py @@ -136,7 +136,7 @@ def add_entire_directory(tab_dir, dir_path, pages_set, src_images, dest_images): ninja.variable('documentation_index', index_json) ninja.variable('output_index', os.path.join(output_dir, "_data", "index.json")) ninja.variable('site_config', config_yaml) - ninja.variable('doxyfile', os.path.join(doxygen_pico_sdk_build_dir, "docs", "Doxyfile")) + ninja.variable('doxyfile', os.path.join(doxygen_pico_sdk_build_dir, "combined", "docs", "Doxyfile")) ninja.newline() targets = [] diff --git a/scripts/create_build_adoc_doxygen.py b/scripts/create_build_adoc_doxygen.py index 1b136fc814..60cb5e33c2 100755 --- a/scripts/create_build_adoc_doxygen.py +++ b/scripts/create_build_adoc_doxygen.py @@ -16,8 +16,9 @@ def check_no_markdown(filename): asciidoc = re.sub(r'----\n.*?\n----', '', asciidoc, flags=re.DOTALL) # strip out pass-through blocks asciidoc = re.sub(r'\+\+\+\+\n.*?\n\+\+\+\+', '', asciidoc, flags=re.DOTALL) - if re.search(r'(?:^|\n)#+', asciidoc): - raise Exception("{} contains a Markdown-style header (i.e. '#' rather than '=')".format(filename)) + # This is messing up the c code blocks + # if re.search(r'(?:^|\n)#+', asciidoc): + # raise Exception("{} contains a Markdown-style header (i.e. '#' rather than '=')".format(filename)) if re.search(r'(\[.+?\]\(.+?\))', asciidoc): raise Exception("{} contains a Markdown-style link (i.e. '[title](url)' rather than 'url[title]')".format(filename)) @@ -45,10 +46,7 @@ def check_no_markdown(filename): if 'from_json' in tab and 'directory' in tab and tab['directory'] == output_subdir: filebase = os.path.splitext(adoc_filename)[0] index_title = filebase - if filebase != "index_doxygen": - picosdk_filename = re.sub("_", "__", filebase)+".html" - else: - picosdk_filename = filebase+".html" + picosdk_filename = filebase+".html" for item in picosdk_data: if re.sub("^group__", "", item["html"]) == picosdk_filename: index_title = item['name'] diff --git a/scripts/create_nav.py b/scripts/create_nav.py index 32921d0272..16b1120a69 100755 --- a/scripts/create_nav.py +++ b/scripts/create_nav.py @@ -131,6 +131,7 @@ def read_file_with_includes(filepath, filelevel, mainfile, output_dir=None): newlevel = len(m.group(1)) # Need to compute anchors for *every* header (updates file_headings) heading = strip_adoc(m.group(2)) + heading = re.sub(r"(\[\.contexttag )(\S+)(\]\*\S+\*)", "\\2", heading) anchor = heading_to_anchor(top_level_file, heading, header_id) if anchor in available_anchors[fullpath]: raise Exception("Anchor {} appears twice in {}".format(anchor, fullpath)) diff --git a/scripts/postprocess_doxygen_adoc.py b/scripts/postprocess_doxygen_adoc.py new file mode 100644 index 0000000000..bf6d4fe0a4 --- /dev/null +++ b/scripts/postprocess_doxygen_adoc.py @@ -0,0 +1,149 @@ +import re +import sys +import os +import json + +def cleanup_text_page(adoc_file, output_adoc_path, link_targets): + filename = os.path.basename(adoc_file) + with open(adoc_file) as f: + adoc_content = f.read() + # remove any errant spaces before anchors + adoc_content = re.sub(r'( +)(\[\[[^[]*?\]\])', "\\2", adoc_content) + # collect link targets + for line in adoc_content.split('\n'): + link_targets = collect_link_target(line, filename) + with open(adoc_file, 'w') as f: + f.write(adoc_content) + return link_targets + +def collect_link_target(line, chapter_filename): + # collect a list of all link targets, so we can fix internal links + l = re.search(r'(#)([^,\]]+)([,\]])', line) + if l is not None: + link_targets[l.group(2)] = chapter_filename + return link_targets + +def resolve_links(adoc_file, link_targets): + filename = os.path.basename(adoc_file) + with open(adoc_file) as f: + adoc_content = f.read() + output_content = [] + for line in adoc_content.split('\n'): + # e.g., <> + m = re.search("(<<)([^,]+)(,?[^>]*>>)", line) + if m is not None: + target = m.group(2) + # only resolve link if it points to another file + if target in link_targets and link_targets[target] != filename: + new_target = link_targets[target]+"#"+target + line = re.sub("(<<)([^,]+)(,?[^>]*>>)", f"\\1{new_target}\\3", line) + output_content.append(line) + with open(adoc_file, 'w') as f: + f.write('\n'.join(output_content)) + return + +def build_json(sections, output_path): + json_path = os.path.join(output_path, "picosdk_index.json") + with open(json_path, 'w') as f: + f.write(json.dumps(sections, indent="\t")) + return + +def tag_content(adoc_content): + # this is dependent on the same order of attributes every time + ids_to_tag = re.findall(r'(\[#)(.*?)(,.*?contextspecific,tag=)(.*?)(,type=)(.*?)(\])', adoc_content) + for this_id in ids_to_tag: + tag = re.sub("PICO_", "", this_id[3]) + img = f" [.contexttag {tag}]*{tag}*" + # `void <> ()`:: An rp2040 function. + adoc_content = re.sub(rf'(\n`.*?<<{this_id[1]},.*?`)(::)', f"\\1{img}\\2", adoc_content) + # |<>\n|Low-level types and (atomic) accessors for memory-mapped hardware registers. + adoc_content = re.sub(rf'(\n\|<<{this_id[1]},.*?>>\n\|.*?)(\n)', f"\\1{img}\\2", adoc_content) + # [#group_cyw43_ll_1ga0411cd49bb5b71852cecd93bcbf0ca2d,role=contextspecific,tag=PICO_RP2040,type=PICO_RP2040]\n=== anonymous enum + HEADING_RE = re.compile(r'(\[#.*?role=contextspecific.*?tag=P?I?C?O?_?)(.*?)(,.*?\]\s*?\n\s*=+\s+\S*?)(\n)') + # [#group_cyw43_ll_1ga0411cd49bb5b71852cecd93bcbf0ca2d,role=h6 contextspecific,tag=PICO_RP2040,type=PICO_RP2040]\n*anonymous enum* + H6_HEADING_RE = re.compile(r'(\[#.*?role=h6 contextspecific.*?tag=P?I?C?O?_?)(.*?)(,.*?\]\s*?\n\s*\*\S+.*?)(\n)') + # [#group_cyw43_ll_1ga0411cd49bb5b71852cecd93bcbf0ca2d,role=h6 contextspecific,tag=PICO_RP2040,type=PICO_RP2040]\n---- + NONHEADING_RE = re.compile(r'(\[#.*?role=h?6?\s?contextspecific.*?tag=P?I?C?O?_?)(.*?)(,.*?\]\s*?\n\s*[^=\*])') + adoc_content = re.sub(HEADING_RE, f'\\1\\2\\3 [.contexttag \\2]*\\2*\n', adoc_content) + adoc_content = re.sub(H6_HEADING_RE, f'\\1\\2\\3 [.contexttag \\2]*\\2*\n', adoc_content) + adoc_content = re.sub(NONHEADING_RE, f'[.contexttag \\2]*\\2*\n\n\\1\\2\\3', adoc_content) + return adoc_content + +def postprocess_doxygen_adoc(adoc_file, output_adoc_path, link_targets): + output_path = re.sub(r'[^/]+$', "", adoc_file) + sections = [{ + "group_id": "index_doxygen", + "name": "Introduction", + "description": "An introduction to the Pico SDK", + "html": "index_doxygen.html", + "subitems": [] + }] + with open(adoc_file) as f: + adoc_content = f.read() + # first, lets add any tags + adoc_content = tag_content(adoc_content) + # now split the file into top-level sections: + # toolchain expects all headings to be two levels lower + adoc_content = re.sub(r'(\n==)(=+ \S+)', "\n\\2", adoc_content) + # then make it easier to match the chapter breaks + adoc_content = re.sub(r'(\[#.*?,reftext=".*?"\])(\s*\n)(= )', "\\1\\3", adoc_content) + # find all the chapter descriptions, to use later + descriptions = re.findall(r'(\[#.*?,reftext=".*?"\])(= .*?\n\s*\n)(.*?)(\n)', adoc_content) + CHAPTER_START_RE = re.compile(r'(\[#)(.*?)(,reftext=".*?"\]= )(.*?$)') + # check line by line; if the line matches our chapter break, + # then pull all following lines into the chapter list until a new match. + chapter_filename = "all_groups.adoc" + current_chapter = None + chapter_dict = {} + counter = 0 + for line in adoc_content.split('\n'): + link_targets = collect_link_target(line, chapter_filename) + m = CHAPTER_START_RE.match(line) + if m is not None: + # write the previous chapter + if current_chapter is not None: + with open(chapter_path, 'w') as f: + f.write('\n'.join(current_chapter)) + # start the new chapter + current_chapter = [] + # set the data for this chapter + group_id = re.sub("^group_+", "", m.group(2)) + chapter_filename = group_id+".adoc" + chapter_path = os.path.join(output_path, chapter_filename) + chapter_dict = { + "group_id": group_id, + "html": group_id+".html", + "name": m.group(4), + "subitems": [], + "description": descriptions[counter][2] + } + sections.append(chapter_dict) + # re-split the line into 2 + start_line = re.sub("= ", "\n= ", line) + current_chapter.append(start_line) + counter += 1 + else: + current_chapter.append(line) + # write the last chapter + if current_chapter is not None: + with open(chapter_path, 'w') as f: + f.write('\n'.join(current_chapter)) + build_json(sections, output_path) + os.remove(adoc_file) + return link_targets + +if __name__ == '__main__': + output_adoc_path = sys.argv[1] + adoc_files = [f for f in os.listdir(output_adoc_path) if re.search(".adoc", f) is not None] + link_targets = {} + for adoc_file in adoc_files: + adoc_filepath = os.path.join(output_adoc_path, adoc_file) + if re.search("all_groups.adoc", adoc_file) is not None: + link_targets = postprocess_doxygen_adoc(adoc_filepath, output_adoc_path, link_targets) + else: + link_targets = cleanup_text_page(adoc_filepath, output_adoc_path, link_targets) + # now that we have a complete list of all link targets, resolve all internal links + adoc_files = [f for f in os.listdir(output_adoc_path) if re.search(".adoc", f) is not None] + for adoc_file in adoc_files: + adoc_filepath = os.path.join(output_adoc_path, adoc_file) + resolve_links(adoc_filepath, link_targets) diff --git a/scripts/postprocess_doxygen_xml.py b/scripts/postprocess_doxygen_xml.py new file mode 100755 index 0000000000..b0f0b9e165 --- /dev/null +++ b/scripts/postprocess_doxygen_xml.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +import sys +import re +import os +import html +from bs4 import BeautifulSoup + +# walk the combined output. +# for each function: +# check if it is in the output for one chip, or both +# if for only one chip, add a role to that section accordingly. + +# instead of searching every xml every time, make a list of available functions in each xml +def compile_id_list(xml_content): + # get any element that has an id + els = xml_content.find_all(id=True) + id_list = [x["id"] for x in els] + return id_list + +def insert_example_code_from_file(combined_content): + els = combined_content.doxygen.find_all("programlisting") + all_examples = {} + # get the examples path + examples_path = re.sub(r"/scripts/.+$", "/lib/pico-examples", os.path.realpath(__file__)) + # get a recursive list of all files in examples + for f in os.walk(examples_path): + for filename in f[2]: + if filename in all_examples: + all_examples[filename].append(os.path.join(f[0], filename)) + else: + all_examples[filename] = [os.path.join(f[0], filename)] + for el in els: + if el.get("filename") is not None: + filename = el.get("filename") + # find the file here or in examples + if filename in all_examples: + with open(all_examples[filename][0]) as f: + example_content = f.read() + example_lines = example_content.split("\n") + for line in example_lines: + codeline = BeautifulSoup(""+html.escape(line)+"", 'xml') + el.append(codeline) + return combined_content + +def walk_and_tag_xml_tree(el, output_contexts, all_contexts): + """ + Process an individual xml file, adding context-specific tags as needed. + + For performance purposes (to avoid traversing multiple dicts for every element), + we use element IDs as the key, and the contexts it belongs to as the value. + Thus, output_contexts will look something like this: + { + "group__hardware__gpio_1gaecd01f57f1cac060abe836793f7bea18": [ + "PICO_RP2040", + "FOO" + ], + "group__hardware__gpio_1ga7becbc8db22ff0a54707029a2c0010e6": [ + "PICO_RP2040" + ], + "group__hardware__gpio_1ga192335a098d40e08b23cc6d4e0513786": [ + "PICO_RP2040" + ], + "group__hardware__gpio_1ga8510fa7c1bf1c6e355631b0a2861b22b": [ + "FOO", + "BAR" + ], + "group__hardware__gpio_1ga5d7dbadb2233e2e6627e9101411beb27": [ + "FOO" + ] + } + """ + targets = [] + if el.get('id') is not None: + myid = el["id"] + if myid in output_contexts: + targets = output_contexts[myid] + # if this content is in all contexts, no label is required + if len(targets) > 0 and len(targets) < len(all_contexts): + el["role"] = "contextspecific" + el["tag"] = ', '.join(targets) + if len(targets) > 1: + el["type"] = "multi" + else: + el["type"] = targets[0] + # only check nested children if the parent has NOT been tagged as context-specific + else: + # for child in el.iterchildren(): + for child in el.find_all(True, recursive=False): + walk_and_tag_xml_tree(child, output_contexts, all_contexts) + else: + for child in el.find_all(True, recursive=False): + walk_and_tag_xml_tree(child, output_contexts, all_contexts) + return + +def postprocess_doxygen_xml_file(combined_xmlfile, xmlfiles, output_context_paths): + """ + Process an individual xml file, adding context-specific tags as needed. + + xmlfiles will look something like this: + { + "PICO_RP2040": "/path/to/PICO_RP2040/myfilename.xml", + "FOO": "/path/to/FOO/myfilename.xml" + } + """ + output_contexts = {} + for item in xmlfiles: + label = item + # parse the xml file + with open(xmlfiles[item], encoding="utf-8") as f: + xml_content = BeautifulSoup(f, 'xml') + # compile a list of all element ids within the file + id_list = compile_id_list(xml_content.doxygen) + # create the map of ids and their contexts (see example above) + for myid in id_list: + if myid in output_contexts: + output_contexts[myid].append(label) + else: + output_contexts[myid] = [label] + with open(combined_xmlfile, encoding="utf-8") as f: + combined_content = BeautifulSoup(f, 'xml') + # start with top-level children, and then walk the tree as appropriate + els = combined_content.doxygen.find_all(True, recursive=False) + for el in els: + walk_and_tag_xml_tree(el, output_contexts, list(output_context_paths.keys())) + combined_content = insert_example_code_from_file(combined_content) + return str(combined_content) + +def postprocess_doxygen_xml(xml_path): + """ + Expectation is that xml for each context will be generated + within a subfolder titled with the context name, e.g.: + - doxygen_build/ + - combined/ + - PICO_RP2040/ + - FOO/ + """ + # collect a list of all context-specific subdirs + skip = ["index.xml", "Doxyfile.xml"] + output_context_paths = {} + combined_output_path = None + for item in list(filter(lambda x: os.path.isdir(os.path.join(xml_path, x)), os.listdir(xml_path))): + if item == "combined": + # if doxygen ever changes the output path for the xml, this will need to be updated + combined_output_path = os.path.join(xml_path, item, "docs", "doxygen", "xml") + else: + # same as above + output_context_paths[item] = os.path.join(xml_path, item, "docs", "doxygen", "xml") + # we need to process all generated xml files + for combined_xmlfile in list(filter(lambda x: re.search(r'\.xml$', x) is not None, os.listdir(combined_output_path))): + # skip the index -- it's just a listing + if combined_xmlfile not in skip: + xmlfiles = {} + # get all context-specific versions of this file + for context in output_context_paths: + if os.path.isfile(os.path.join(output_context_paths[context], combined_xmlfile)): + xmlfiles[context] = os.path.join(output_context_paths[context], combined_xmlfile) + combined_content = postprocess_doxygen_xml_file(os.path.join(combined_output_path, combined_xmlfile), xmlfiles, output_context_paths) + # write the output + with open(os.path.join(combined_output_path, combined_xmlfile), 'w') as f: + f.write(combined_content) + return + +if __name__ == '__main__': + xml_path = sys.argv[1] + file_path = os.path.realpath(__file__) + # splitting thse subs into two parts to make testing easier + # xml_path = re.sub(r'/documentation-toolchain/.*?$', "/"+xml_path, re.sub(r'/lib/', "/", file_path)) + postprocess_doxygen_xml(xml_path) diff --git a/scripts/tests/test_doxygen_adoc.py b/scripts/tests/test_doxygen_adoc.py index 15faeca960..e4ce20b5e2 100644 --- a/scripts/tests/test_doxygen_adoc.py +++ b/scripts/tests/test_doxygen_adoc.py @@ -2,8 +2,6 @@ import re import unittest from pathlib import Path -from transform_doxygen_html import parse_individual_file -from transform_doxygen_html import compile_json_mappings # to run: on the command line, from the /scripts dir: python3 -m unittest tests.test_doxygen_adoc @@ -16,25 +14,6 @@ def setUp(self): def tearDown(self): pass - def test_parse_individual_file(self): - updated_links = {} - html_path = os.path.join(self.current_dir, "fixtures") - adoc_fixture = os.path.join(html_path, "expected_adoc.adoc") - html_file = "group__hardware__dma.html" - json_dir = os.path.join(self.parent_dir, "doxygen_json_mappings") - json_files = os.listdir(json_dir) - json_files = [f for f in json_files if re.search(".json", f) is not None] - complete_json_mappings = compile_json_mappings(json_dir, json_files) - h_json = [{'group_id': 'hardware', 'name': 'Hardware APIs', 'description': 'This group of libraries provides a thin and efficient C API / abstractions to access the RP2040 hardware without having to read and write hardware registers directly. ', 'html': 'group__hardware.html', 'subitems': [{'name': 'hardware_dma', 'file': 'group__hardware__dma.adoc', 'html': 'group__hardware__dma.html', 'subitems': []}]}] - adoc, h_json = parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json) - adoc_cleaned = re.sub("rpip[a-zA-Z0-9]+", "", adoc) - expected_json = [{'group_id': 'hardware', 'name': 'Hardware APIs', 'description': 'This group of libraries provides a thin and efficient C API / abstractions to access the RP2040 hardware without having to read and write hardware registers directly. ', 'html': 'group__hardware.html', 'subitems': [{'name': 'hardware_dma', 'file': 'group__hardware__dma.adoc', 'html': 'group__hardware__dma.html', 'subitems': [{'name': 'group__channel__config', 'file': 'group__channel__config.adoc', 'html': 'group__channel__config.html', 'subitems': []}]}]}] - with open(adoc_fixture) as f: - expected_adoc = f.read() - expected_adoc_cleaned = re.sub("rpip[a-zA-Z0-9]+", "", expected_adoc) - self.assertEqual(expected_json, h_json) - self.assertEqual(expected_adoc_cleaned, adoc_cleaned) - def test_doxygen_adoc_variables(self): # run AFTER the content has been built; # test will fail if ANY of the below are different or missing diff --git a/scripts/transform_doxygen_html.py b/scripts/transform_doxygen_html.py deleted file mode 100755 index cb61654dd9..0000000000 --- a/scripts/transform_doxygen_html.py +++ /dev/null @@ -1,845 +0,0 @@ -#!/usr/bin/env python3 - -import json -import os -import sys -import re -import random -import string -import copy -import hashlib - -from lxml import etree - -# TO DO: -# do internal href links need to be updated? - -def add_next_with_tail(target, inserted): - if target.tail is not None: - inserted.tail = inserted.tail + target.tail if inserted.tail is not None else target.tail - target.tail = None - target.addnext(inserted) - -def get_all_text(node): - text = node.text if node.text else None - if text: - yield text - for child in node: - yield from get_all_text(child) - tail = node.tail if node.tail else None - if tail: - yield tail - -def stringify(lxml_content): - html_string = etree.tostring(lxml_content, pretty_print=True, encoding='UTF-8').decode('utf-8') - return html_string - -def write_output(filepath, content): - f = open(filepath, 'w') - f.write(content) - f.close() - return - -def make_hash(string): - hash_object = hashlib.sha1(bytes(string, 'utf-8')) - new_hash = hash_object.hexdigest() - if len(new_hash) > 20: - new_hash = new_hash[:20] - return new_hash - -def add_ids(root, html_file): - els = root.xpath(".//body//*[not(@id)]") - counter = 0 - for el in els: - hash_string = str(counter)+html_file+''.join(get_all_text(el)) - newid = make_hash(hash_string) - newid = "rpip" + newid - el.set("id", newid) - counter += 1 - return root - -def strip_attribute(att, root): - els = root.xpath(".//*[@"+att+"]") - for el in els: - el.attrib.pop(att) - return root - -def make_attribute_selector(sel, item): - try: - atts = [] - for att in item["attributes"]: - # if we've got a wildcard, this should be a "contains" selector, - # e.g. [contains(@class,'foo')]: - contains = False - for att_value in att["value"]: - if "*" in att_value: - contains = True - if contains == True: - val = re.sub(r"\*", "", " ".join(att["value"])) - atts.append("contains(@" + att["name"] + ",'" + val + "')") - else: - # otherwise it's a normal attribute selector - atts.append("@" + att["name"] + "='" + " ".join(att["value"]) + "'") - if len(atts) > 0: - att_string = " and ".join(atts) - sel = sel + "[" + att_string + "]" - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return sel - -def make_parent_selector(sel, item): - try: - if len(item["parents"]) > 0: - # sort the parents by level - # add each parent to the selector based on level - parent_sel = "" - sorted_parents = list(reversed(sorted(item["parents"], key=lambda d: d['level']))) - for ix, parent in enumerate(sorted_parents): - # now add the parent element to the selector - parent_sel = parent_sel + parent["element"] - parent_sel = make_attribute_selector(parent_sel, parent) - if len(sorted_parents) > ix+1: - next_ix = ix+1 - level = parent["level"] - sorted_parents[next_ix]["level"] - 1 - else: - level = parent["level"] - if level > 0: - for i in range(level): - parent_sel = parent_sel + "/*" - parent_sel = parent_sel + "/" - sel = sel + parent_sel - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return sel - -def make_selector(item, is_child=False): - sel = None - try: - if is_child == True: - sel = "./" - else: - sel = ".//" - # add parent selectors - sel = make_parent_selector(sel, item) - sel = sel + item["element"] - sel = make_attribute_selector(sel, item) - sel = sel + "[not(@data-processed='true')]" - # add child selectors - # TO DO - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return sel - -def make_new_element(item): - new_el = None - # construct the new element and children from the mapping - try: - new_el = etree.Element(item["element"]) - for att in item["attributes"]: - new_el.set(att["name"], ' '.join(att["value"])) - new_el.set("data-processed", "true") - sorted_children = sorted(item["children"], key=lambda d: d['position']) - for child in sorted_children: - new_child = make_new_element(child) - new_el.append(new_child) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return new_el - -def make_tree(item): - new_tree = None - # construct the complete element tree from the mapping - try: - tree = item["output"]["tree"] - if len(tree) > 0: - sorted_tree = sorted(tree, key=lambda d: d['position']) - # build an element - for tree_el in sorted_tree: - new_tree = make_new_element(tree_el) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return new_tree - -def add_content_to_tree(new_tree, match): - try: - # preserve the same id, just in case - new_tree.set("id", match.get("id")) - # also preserve the original parent id - parent = match.getparent() - if parent is not None and parent.get("id") is not None: - new_tree.set("data-parent-id", parent.get("id")) - # figure out where to insert any children - # (this is configured in the json mapping) - target = new_tree.find(".//*[@data-target='true']") - if target is None: - target = new_tree - target.text = match.text - target.tail = match.tail - target.set("data-target-for", match.get("id")) - # children will get processed separately - # add any children inside the target - for child in match.findall("./*"): - target.append(child) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return new_tree - -def transform_element(item, root, is_child=False): - try: - # build the selector for the xpath - sel = make_selector(item["input"], is_child) - if sel is not None: - matches = root.xpath(sel) - for match in matches: - # first process any mapped children - if "child_mappings" in item["input"] and len(item["input"]["child_mappings"]) > 0: - for child_item in item["input"]["child_mappings"]: - match = transform_element(child_item, match, True) - new_tree = make_tree(item) - if new_tree is not None: - # set attributes, add text/tail, and add children - new_tree = add_content_to_tree(new_tree, match) - # add the new tree to the document - add_next_with_tail(match, new_tree) - # remove the old element - match.getparent().remove(match) - else: - # if there is no tree, the element should be removed - # first, preserve any children: - for child in reversed(match.findall("./*")): - add_next_with_tail(match, child) - # handle the tail if needed - if match.tail is not None and re.search(r"\S", match.tail) is not None: - prev = match.getprevious() - if prev is not None: - prev.tail = prev.tail + match.tail if prev.tail is not None else match.tail - else: - parent = match.getparent() - parent.text = parent.text + match.tail if parent.text is not None else match.tail - # then remove the element - match.getparent().remove(match) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def fix_duplicate_ids(root, html_file): - try: - existing = [] - matches = root.xpath(".//*[contains(@id, 'rpip')]") - counter = 0 - for match in matches: - myid = match.get("id") - if myid in existing: - id_string = str(counter)+html_file+''.join(get_all_text(match)) - newid = make_hash(id_string) - newid = "rpip"+newid - match.set("id", newid) - existing.append(newid) - counter += 1 - else: - existing.append(myid) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def fix_internal_links(root, html_file, updated_links): - try: - # first let's make sure internal links are all unique - matches = root.xpath(".//a[contains(@href, '#') and not(@data-adjusted)]") - while len(matches) > 0: - match = matches[0] - href = match.get("href") - if re.match("^#", href) is not None and len(href) < 30: - # make a new hash string - hash_string = html_file+''.join(get_all_text(match))+match.get("href") - newid = make_hash(hash_string) - newid = "ga" + newid - updated_links[html_file+href] = html_file+"#"+newid - match.set("href", "#"+newid) - match.set("data-adjusted", "true") - links = root.xpath(".//a[@href='#"+href+"']") - for link in links: - link.set("href", "#"+newid) - link.set("data-adjusted", "true") - anchor_id = re.sub("^#", "", href) - anchors = root.xpath(".//*[@id='"+anchor_id+"']") - for anchor in anchors: - anchor.set("id", newid) - else: - match.set("data-adjusted", "true") - matches = root.xpath(".//a[contains(@href, '#') and not(@data-adjusted)]") - # then we'll adjust them - matches = root.xpath(".//a[contains(@href, '"+html_file+"#')]") - for match in matches: - href = match.get("href") - new_href = re.sub(html_file, "", href) - match.set("href", new_href) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root, updated_links - -def make_filename_id(filename): - my_id = filename - try: - my_id = re.sub(".html$", "", my_id) - my_id = re.sub("^group__", "", my_id) - my_id = re.sub("__", "_", my_id) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return my_id - -def find_item_in_toc(h_json, filename): - try: - found = False - matching_file = None - for item in h_json: - if found == False: - if "html" in item and item["html"] == filename: - matching_file = item["html"] - found = True - elif "subitems" in item: - matching_file, found = find_item_in_toc(item["subitems"], filename) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - -def fix_external_links(adoc, h_json): - try: - matches = re.findall(r"(href=[\"'])([^\s>]*?)([\"'])", adoc) - for match in matches: - href = match[1] - # href = match.get("href") - if re.match("^https?:", href) is None and re.match("^#", href) is None: - filename = href - target_id = None - if "#" in href: - filename = href.split("#")[0] - target_id = href.split("#")[1] - # walk the toc data to find the main html file - val, parent_tree = find_toc_item(h_json, filename, []) - if val is not None: - parent_file = h_json[parent_tree[0]]["html"] - # parent_file, found = find_item_in_toc(h_json, filename) - if parent_file is not None: - parent_file_dest = re.sub("^group__", "", parent_file) - new_href = parent_file_dest - if filename != parent_file: - if target_id is None: - my_id = make_filename_id(filename) - new_href = new_href + "#" + my_id - else: - new_href = new_href + "#" + target_id - new_href = re.sub("__", "_", new_href) - adoc = re.sub(href, new_href, adoc) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return adoc - -def merge_lists(list_type, root): - try: - # merge contiguous lists that came from the same original parent - matches = root.findall(".//"+list_type+"[@data-parent-id]") - for match in matches: - my_ref = match.get("data-parent-id") - next_el = match.getnext() - if next_el is not None: - next_ref = next_el.get("data-parent-id") - while next_el is not None and next_el.tag == list_type and next_ref is not None and next_ref == my_ref: - for child in next_el.findall("./*"): - match.append(child) - next_el.getparent().remove(next_el) - next_el = match.getnext() - if next_el is not None: - next_ref = next_el.get("data-parent-id") - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def wrap_list_items(root): - try: - matches = root.xpath(".//li[not(./p)]") - for match in matches: - newp = etree.Element("p") - newp.text = match.text - match.text = None - for child in match.findall("./*"): - newp.append(child) - match.append(newp) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def make_cell_para(el): - try: - newp = etree.Element("p") - newp.text = el.text - el.text = None - for child in el.findall("./*"): - newp.append(child) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return newp - -def merge_note_paras(root): - try: - matches = root.xpath(".//div[@class='admonitionblock note' and count(.//td[@class='content']) > 1]") - for match in matches: - first_cell = match.find(".//td[@class='content']") - newp = make_cell_para(first_cell) - first_cell.append(newp) - next = first_cell.getnext() - while next is not None: - newp = make_cell_para(next) - first_cell.append(newp) - next.getparent().remove(next) - next = first_cell.getnext() - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def fix_heading_levels(root): - try: - all_heads = root.xpath(".//p[contains(@class, 'adoc-h2')]|.//p[contains(@class, 'adoc-h3')]") - if len(all_heads) > 0: - head = all_heads[0] - myclass = head.get("class") - if "adoc-h3" in myclass: - head.set("class", "adoc-h2") - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def get_document_title(root, html_file): - title_text = re.sub(".html", "", html_file) - try: - title = root.find(".//div[@class='headertitle']/div[@class='title']") - if title is not None: - title_categories = title.find("./div[@class='ingroups']") - if title_categories is not None: - # move to the document contents - contents = root.find(".//div[@class='contents']") - if contents is not None: - contents.insert(0, title_categories) - title_categories.text = "Part of: " + title_categories.text if title_categories.text is not None else "Part of: " - title_text = ''.join(get_all_text(title)) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return title_text - -def retag_heading(head, headtype): - try: - text = ''.join(get_all_text(head)) - newel = etree.Element("p") - newel.set("class", "adoc-"+headtype) - anchors = head.xpath("./a[@class='anchor' and @id]") - if len(anchors) > 0: - anchor = anchors[0] - else: - anchor = None - if anchor is not None and anchor.text is None: - newel.set("id", anchor.get("id")) - else: - newel.set("id", head.get("id")) - newel.text = text - add_next_with_tail(head, newel) - head.getparent().remove(head) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return - -def prep_for_adoc(root): - try: - h2s = root.xpath(".//div[@class='contents']/h2|.//div[@class='contents']/div[@class='textblock']/h2") - for head in h2s: - retag_heading(head, "h2") - h3s = root.xpath(".//div[@class='contents']/h3|.//div[@class='contents']/div[@class='textblock']/h3") - for head in h3s: - retag_heading(head, "h3") - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root - -def make_adoc(root_string, title_text, filename): - try: - my_id = make_filename_id(filename) - root_string = re.sub(r"<\/div>\s*?$", "", root_string, flags=re.S) - root_string = re.sub(r'
', "", root_string) - root_string = "[["+my_id+"]]\n== " + title_text + "\n\n++++\n" + root_string - root_string = re.sub(r'(]+class="adoc-h2"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n=== \\4\n\n++++\n', root_string, flags=re.S) - root_string = re.sub(r'(]+class="adoc-h3"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n==== \\4\n\n++++\n', root_string, flags=re.S) - root_string = root_string + "\n++++\n" - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root_string - -def decrease_heading_levels(adoc): - try: - adoc = re.sub(r"\n==", "\n=", adoc, flags=re.S) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return adoc - -def traverse_subitems(subitems, toc_list): - for item in subitems: - if "html" in item: - toc_list.append(item["html"]) - if "subitems" in item: - toc_list = traverse_subitems(item["subitems"], toc_list) - return toc_list - -def parse_toc(h_json, toc_list): - try: - for item in h_json: - if "filename" in item: - toc_list.append(item["filename"]) - elif "subitems" in item: - toc_list = traverse_subitems(item["subitems"], toc_list) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return h_json, toc_list - -def parse_header(header_path): - h_json = [ - { 'group_id': 'index_doxygen', 'name': 'Introduction', 'description': 'An introduction to the Pico SDK', 'html': 'index_doxygen.html', 'subitems': [] } - ] - try: - with open(header_path) as h: - content = h.read() - blocks = re.findall(r"^(\s*)(\*|\/\*\*)(\s*)(\s)(\*)(\s)(\\)(defgroup)([^}]*)(\@\})", content, re.M) - for (a, b, c, d, e, f, g, h, i, j) in blocks: - items = i.split(r"\defgroup") - group_id = None - for item in items: - if group_id is None: # must be the first item in the list - m = re.match(r"(\s*)(\S*)(\s*)([^*]*)(.*?)(@\{)", item, re.S) - group_id = m.group(2) - group_filename = "group_"+group_id+".html" - group_filename = re.sub("_", "__", group_filename) - group_name = m.group(4) - group_name = re.sub(r"\s*$", "", group_name, re.M) - group_desc = m.group(5) - group_desc = re.sub(r"\n", "", group_desc, re.M) - group_desc = re.sub(r"\*", "", group_desc, re.M) - group_desc = re.sub(r"^\s", "", group_desc, re.M) - group_json = { 'group_id': group_id, 'name': group_name, 'description': group_desc, 'html': group_filename, 'subitems': [] } - h_json.append(group_json) - else: - cleaned = item - cleaned = re.sub(r"\n*", "", cleaned, re.M) - cleaned = re.sub(r"^\s*", "", cleaned, re.M) - cleaned = re.sub(r"\s*\*\s*$", "", cleaned, re.M) - val = cleaned.split(" ")[0] - filename = re.sub("_", "__", val) - filename = "group__" + filename - group_json['subitems'].append({ 'name': val, 'file': filename + ".adoc", 'html': filename + ".html", 'subitems': [] }) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return h_json - -def compile_json_mappings(json_dir, json_files): - try: - compiled = [] - skip = ["table_memname.json"] - for json_file in sorted(json_files): - if json_file not in skip: - # read the json - file_path = os.path.join(json_dir, json_file) - with open(file_path) as f: - data = json.load(f) - compiled.append(data) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return compiled - -def compile_includes(my_adoc, output_path, subitems): - try: - for item in subitems: - # append includes directly to the parent file - adoc_filename = item["file"] - full_adoc_path = os.path.join(output_path, adoc_filename) - # read the adoc - included_content = "" - with open(full_adoc_path) as f: - included_content = f.read() - my_adoc += "\n\n" - my_adoc += included_content - if "subitems" in item and len(item["subitems"]) > 0: - my_adoc = compile_includes(my_adoc, output_path, item["subitems"]) - os.remove(full_adoc_path) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return my_adoc - -def walk_json(item, group_adoc, output_path): - try: - filename = item["file"] - group_adoc = group_adoc + "include::" + filename + "[]\n\n" - if "subitems" in item and len(item["subitems"]) > 0: - # compile includes into a single file - my_adoc = "" - my_adoc_path = os.path.join(output_path, filename) - with open(my_adoc_path) as f: - my_adoc = f.read() - my_adoc = compile_includes(my_adoc, output_path, item["subitems"]) - # write the new file - write_output(my_adoc_path, my_adoc) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return group_adoc - -def walk_nested_adoc(item, output_path, level): - try: - # only adjust nested items - if level > 1: - # read the adoc file - # not all items in the json have an adoc path - adoc_path = re.sub(".html$", ".adoc", item["html"]) - filepath = os.path.join(output_path, adoc_path) - with open(filepath) as f: - content = f.read() - subs = "=" - for i in range(level-1): - subs = subs + "=" - content = re.sub("^=", subs, content, flags=re.M) - write_output(filepath, content) - # adjust the heading levels - if "subitems" in item: - for subitem in item["subitems"]: - newlevel = level + 1 - newlevel = walk_nested_adoc(subitem, output_path, newlevel) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return level - -def find_toc_item(subitems, path, parent_tree): - try: - val = None - original_tree = parent_tree.copy() - for ix, item in enumerate(subitems): - if val is None: - parent_tree.append(ix) - if "html" in item and item["html"] == path: - val = item - elif "subitems" in item: - val, parent_tree = find_toc_item(item["subitems"], path, parent_tree) - if val is None: - parent_tree = original_tree.copy() - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return val, parent_tree - -def check_toc_level(h_json, html_file, root): - try: - # check for the Modules table - tables = root.xpath(".//table[@class='memberdecls' and ./tr/td/h2[contains(text(),'Modules')]]") - if len(tables) > 0: - table = tables[0] - modules = table.xpath(".//tr[contains(@class, 'memitem:')]//a") - modules = [f.get("href") for f in modules] - # also collect this file's parents - header = root.find(".//div[@class='headertitle']") - outer_parents = [] - if header is not None: - h_parents = header.findall(".//div[@class='ingroups']/a") - for h_item in h_parents: - outer_parents.append(h_item.get("href")) - outer_parents.append(html_file) - - # first check the outer parents to find our starting point - level = h_json - for ix, parent in enumerate(outer_parents): - #for toc_item in level: - val, parent_tree = find_toc_item(level, parent, []) - if val is not None: - for n in parent_tree: - level = level[n] - if "subitems" not in level: - level["subitems"] = [] - level = level["subitems"] - # create each toc level as needed - elif ix > 0: - new_subitem = {'name': re.sub(".html", "", parent), 'file': re.sub(".html", ".adoc", parent), 'html': parent, 'subitems': []} - level.append(new_subitem) - level = new_subitem["subitems"] - - # then check all the modules - for ix, module in enumerate(modules): - found = False - for toc_item in level: - if "html" in toc_item and toc_item["html"] == module: - found = True - break - if found == False: - level.append({'name': re.sub(".html", "", module), 'file': re.sub(".html", ".adoc", module), 'html': module, 'subitems': []}) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return h_json - -def parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json): - try: - # create the full path - this_path = os.path.join(html_path, html_file) - # read the input root - with open(this_path) as h: - html_content = h.read() - html_content = re.sub(r'<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content) - html_content = re.sub('rel="stylesheet">', 'rel="stylesheet"/>', html_content) - html_content = re.sub('&display=swap"', '"', html_content) - html_content = re.sub(r'Raspberry Pi', '', html_content) - html_content = re.sub(r'Raspberry Pi', '', html_content) - html_content = re.sub(r"<\!-- HTML header for doxygen \S*?-->", '', html_content) - html_content = re.sub(' xmlns="http://www.w3.org/1999/xhtml"', '', html_content) - root = etree.HTML(html_content) - - # give everything an id - root = add_ids(root, html_file) - # first check to see if this should be in the toc list - h_json = check_toc_level(h_json, html_file, root) - # loop over each json file - skip = ["table_memname.json"] - for mapping in complete_json_mappings: - for item in mapping: - root = transform_element(item, root) - # fix links - root, updated_links = fix_internal_links(root, html_file, updated_links) - # cleanup - root = merge_lists("ul", root) - root = merge_lists("ol", root) - root = wrap_list_items(root) - # combine multi-para notes into one container - root = merge_note_paras(root) - # add some extra items to help with the adoc conversion - root = prep_for_adoc(root) - # fix some heading levels - root = fix_heading_levels(root) - root = fix_duplicate_ids(root, html_file) - # cleanup - root = strip_attribute("data-processed", root) - # get the document title - title_text = get_document_title(root, html_file) - # get only the relevant content - contents = root.find(".//div[@class='contents']") - if contents is not None: - # prep and write the adoc - final_output = stringify(contents) - adoc = make_adoc(final_output, title_text, html_file) - else: - adoc = None - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return adoc, h_json - -def handler(html_path, output_path, header_path, output_json): - try: - dir_path = os.path.dirname(os.path.realpath(__file__)) - json_dir = os.path.join(dir_path, "doxygen_json_mappings") - html_dir = os.path.realpath(html_path) - output_dir = os.path.realpath(output_path) - # get the file order and groupings - h_json = parse_header(header_path) - # read the json transform mappings: - # get all the json files within a specified directory - json_files = os.listdir(json_dir) - # filter for just json files - json_files = [f for f in json_files if re.search(".json", f) is not None] - complete_json_mappings = compile_json_mappings(json_dir, json_files) - # get a list of all the html files - html_files = os.listdir(html_dir) - html_files = [f for f in html_files if re.search(".html", f) is not None] - # sort the files ascending - html_files.sort() - # process every html file - updated_links = {} - - for html_file in html_files: - this_output_path = os.path.join(output_path, html_file) - # parse the file - adoc, h_json = parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json) - # write the final adoc file - adoc_path = re.sub(".html$", ".adoc", this_output_path) - if adoc is not None: - write_output(adoc_path, adoc) - print("Generated " + adoc_path) - else: - print("--------- SKIPPED " + adoc_path) - - toc_list = [] - toc_list = parse_toc(h_json, toc_list) - - # adjust nested adoc headings - for item in h_json: - level = 0 - # walk the tree and adjust as necessary - level = walk_nested_adoc(item, output_path, level) - - # fix any links that were updated from other files - adoc_files = os.listdir(output_path) - adoc_files = [f for f in adoc_files if re.search(".adoc", f) is not None] - for adoc_file in adoc_files: - this_path = os.path.join(output_path, adoc_file) - with open(this_path) as h: - content = h.read() - # fix links - content = fix_external_links(content, h_json) - # fix heading levels for non-included pages - src_html_file = re.sub(".adoc", ".html", adoc_file) - if src_html_file not in toc_list: - adoc = decrease_heading_levels(adoc) - for link in updated_links: - content = re.sub(link, updated_links[link], content) - write_output(this_path, content) - - # make the group adoc files - # include::micropython/what-board.adoc[] - for item in h_json: - group_adoc = "= " + item['name'] + "\n\n" - group_adoc = group_adoc + item['description'] + "\n\n" - if 'html' in item: - item_filename = item['html'] - for toc_item in item["subitems"]: - group_adoc = walk_json(toc_item,group_adoc,output_path) - group_output_path = os.path.join(output_path, item["group_id"] + ".adoc") - write_output(group_output_path, group_adoc) - # write the json structure file as well - write_output(output_json, json.dumps(h_json, indent="\t")) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return - -if __name__ == "__main__": - html_path = sys.argv[1] - output_path = sys.argv[2] - header_path = sys.argv[3] - output_json = sys.argv[4] - handler(html_path, output_path, header_path, output_json)