]+class="adoc-h2"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n=== \\4\n\n++++\n', root_string, flags=re.S) - root_string = re.sub(r'(
]+class="adoc-h3"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n==== \\4\n\n++++\n', root_string, flags=re.S) - root_string = root_string + "\n++++\n" - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return root_string - -def decrease_heading_levels(adoc): - try: - adoc = re.sub(r"\n==", "\n=", adoc, flags=re.S) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return adoc - -def traverse_subitems(subitems, toc_list): - for item in subitems: - if "html" in item: - toc_list.append(item["html"]) - if "subitems" in item: - toc_list = traverse_subitems(item["subitems"], toc_list) - return toc_list - -def parse_toc(h_json, toc_list): - try: - for item in h_json: - if "filename" in item: - toc_list.append(item["filename"]) - elif "subitems" in item: - toc_list = traverse_subitems(item["subitems"], toc_list) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return h_json, toc_list - -def parse_header(header_path): - h_json = [ - { 'group_id': 'index_doxygen', 'name': 'Introduction', 'description': 'An introduction to the Pico SDK', 'html': 'index_doxygen.html', 'subitems': [] } - ] - try: - with open(header_path) as h: - content = h.read() - blocks = re.findall(r"^(\s*)(\*|\/\*\*)(\s*)(\s)(\*)(\s)(\\)(defgroup)([^}]*)(\@\})", content, re.M) - for (a, b, c, d, e, f, g, h, i, j) in blocks: - items = i.split(r"\defgroup") - group_id = None - for item in items: - if group_id is None: # must be the first item in the list - m = re.match(r"(\s*)(\S*)(\s*)([^*]*)(.*?)(@\{)", item, re.S) - group_id = m.group(2) - group_filename = "group_"+group_id+".html" - group_filename = re.sub("_", "__", group_filename) - group_name = m.group(4) - group_name = re.sub(r"\s*$", "", group_name, re.M) - group_desc = m.group(5) - group_desc = re.sub(r"\n", "", group_desc, re.M) - group_desc = re.sub(r"\*", "", group_desc, re.M) - group_desc = re.sub(r"^\s", "", group_desc, re.M) - group_json = { 'group_id': group_id, 'name': group_name, 'description': group_desc, 'html': group_filename, 'subitems': [] } - h_json.append(group_json) - else: - cleaned = item - cleaned = re.sub(r"\n*", "", cleaned, re.M) - cleaned = re.sub(r"^\s*", "", cleaned, re.M) - cleaned = re.sub(r"\s*\*\s*$", "", cleaned, re.M) - val = cleaned.split(" ")[0] - filename = re.sub("_", "__", val) - filename = "group__" + filename - group_json['subitems'].append({ 'name': val, 'file': filename + ".adoc", 'html': filename + ".html", 'subitems': [] }) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return h_json - -def compile_json_mappings(json_dir, json_files): - try: - compiled = [] - skip = ["table_memname.json"] - for json_file in sorted(json_files): - if json_file not in skip: - # read the json - file_path = os.path.join(json_dir, json_file) - with open(file_path) as f: - data = json.load(f) - compiled.append(data) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return compiled - -def compile_includes(my_adoc, output_path, subitems): - try: - for item in subitems: - # append includes directly to the parent file - adoc_filename = item["file"] - full_adoc_path = os.path.join(output_path, adoc_filename) - # read the adoc - included_content = "" - with open(full_adoc_path) as f: - included_content = f.read() - my_adoc += "\n\n" - my_adoc += included_content - if "subitems" in item and len(item["subitems"]) > 0: - my_adoc = compile_includes(my_adoc, output_path, item["subitems"]) - os.remove(full_adoc_path) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return my_adoc - -def walk_json(item, group_adoc, output_path): - try: - filename = item["file"] - group_adoc = group_adoc + "include::" + filename + "[]\n\n" - if "subitems" in item and len(item["subitems"]) > 0: - # compile includes into a single file - my_adoc = "" - my_adoc_path = os.path.join(output_path, filename) - with open(my_adoc_path) as f: - my_adoc = f.read() - my_adoc = compile_includes(my_adoc, output_path, item["subitems"]) - # write the new file - write_output(my_adoc_path, my_adoc) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return group_adoc - -def walk_nested_adoc(item, output_path, level): - try: - # only adjust nested items - if level > 1: - # read the adoc file - # not all items in the json have an adoc path - adoc_path = re.sub(".html$", ".adoc", item["html"]) - filepath = os.path.join(output_path, adoc_path) - with open(filepath) as f: - content = f.read() - subs = "=" - for i in range(level-1): - subs = subs + "=" - content = re.sub("^=", subs, content, flags=re.M) - write_output(filepath, content) - # adjust the heading levels - if "subitems" in item: - for subitem in item["subitems"]: - newlevel = level + 1 - newlevel = walk_nested_adoc(subitem, output_path, newlevel) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return level - -def find_toc_item(subitems, path, parent_tree): - try: - val = None - original_tree = parent_tree.copy() - for ix, item in enumerate(subitems): - if val is None: - parent_tree.append(ix) - if "html" in item and item["html"] == path: - val = item - elif "subitems" in item: - val, parent_tree = find_toc_item(item["subitems"], path, parent_tree) - if val is None: - parent_tree = original_tree.copy() - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return val, parent_tree - -def check_toc_level(h_json, html_file, root): - try: - # check for the Modules table - tables = root.xpath(".//table[@class='memberdecls' and ./tr/td/h2[contains(text(),'Modules')]]") - if len(tables) > 0: - table = tables[0] - modules = table.xpath(".//tr[contains(@class, 'memitem:')]//a") - modules = [f.get("href") for f in modules] - # also collect this file's parents - header = root.find(".//div[@class='headertitle']") - outer_parents = [] - if header is not None: - h_parents = header.findall(".//div[@class='ingroups']/a") - for h_item in h_parents: - outer_parents.append(h_item.get("href")) - outer_parents.append(html_file) - - # first check the outer parents to find our starting point - level = h_json - for ix, parent in enumerate(outer_parents): - #for toc_item in level: - val, parent_tree = find_toc_item(level, parent, []) - if val is not None: - for n in parent_tree: - level = level[n] - if "subitems" not in level: - level["subitems"] = [] - level = level["subitems"] - # create each toc level as needed - elif ix > 0: - new_subitem = {'name': re.sub(".html", "", parent), 'file': re.sub(".html", ".adoc", parent), 'html': parent, 'subitems': []} - level.append(new_subitem) - level = new_subitem["subitems"] - - # then check all the modules - for ix, module in enumerate(modules): - found = False - for toc_item in level: - if "html" in toc_item and toc_item["html"] == module: - found = True - break - if found == False: - level.append({'name': re.sub(".html", "", module), 'file': re.sub(".html", ".adoc", module), 'html': module, 'subitems': []}) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return h_json - -def parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json): - try: - # create the full path - this_path = os.path.join(html_path, html_file) - # read the input root - with open(this_path) as h: - html_content = h.read() - html_content = re.sub(r'<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content) - html_content = re.sub('rel="stylesheet">', 'rel="stylesheet"/>', html_content) - html_content = re.sub('&display=swap"', '"', html_content) - html_content = re.sub(r'', '', html_content) - html_content = re.sub(r'', '', html_content) - html_content = re.sub(r"<\!-- HTML header for doxygen \S*?-->", '', html_content) - html_content = re.sub(' xmlns="http://www.w3.org/1999/xhtml"', '', html_content) - root = etree.HTML(html_content) - - # give everything an id - root = add_ids(root, html_file) - # first check to see if this should be in the toc list - h_json = check_toc_level(h_json, html_file, root) - # loop over each json file - skip = ["table_memname.json"] - for mapping in complete_json_mappings: - for item in mapping: - root = transform_element(item, root) - # fix links - root, updated_links = fix_internal_links(root, html_file, updated_links) - # cleanup - root = merge_lists("ul", root) - root = merge_lists("ol", root) - root = wrap_list_items(root) - # combine multi-para notes into one container - root = merge_note_paras(root) - # add some extra items to help with the adoc conversion - root = prep_for_adoc(root) - # fix some heading levels - root = fix_heading_levels(root) - root = fix_duplicate_ids(root, html_file) - # cleanup - root = strip_attribute("data-processed", root) - # get the document title - title_text = get_document_title(root, html_file) - # get only the relevant content - contents = root.find(".//div[@class='contents']") - if contents is not None: - # prep and write the adoc - final_output = stringify(contents) - adoc = make_adoc(final_output, title_text, html_file) - else: - adoc = None - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return adoc, h_json - -def handler(html_path, output_path, header_path, output_json): - try: - dir_path = os.path.dirname(os.path.realpath(__file__)) - json_dir = os.path.join(dir_path, "doxygen_json_mappings") - html_dir = os.path.realpath(html_path) - output_dir = os.path.realpath(output_path) - # get the file order and groupings - h_json = parse_header(header_path) - # read the json transform mappings: - # get all the json files within a specified directory - json_files = os.listdir(json_dir) - # filter for just json files - json_files = [f for f in json_files if re.search(".json", f) is not None] - complete_json_mappings = compile_json_mappings(json_dir, json_files) - # get a list of all the html files - html_files = os.listdir(html_dir) - html_files = [f for f in html_files if re.search(".html", f) is not None] - # sort the files ascending - html_files.sort() - # process every html file - updated_links = {} - - for html_file in html_files: - this_output_path = os.path.join(output_path, html_file) - # parse the file - adoc, h_json = parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json) - # write the final adoc file - adoc_path = re.sub(".html$", ".adoc", this_output_path) - if adoc is not None: - write_output(adoc_path, adoc) - print("Generated " + adoc_path) - else: - print("--------- SKIPPED " + adoc_path) - - toc_list = [] - toc_list = parse_toc(h_json, toc_list) - - # adjust nested adoc headings - for item in h_json: - level = 0 - # walk the tree and adjust as necessary - level = walk_nested_adoc(item, output_path, level) - - # fix any links that were updated from other files - adoc_files = os.listdir(output_path) - adoc_files = [f for f in adoc_files if re.search(".adoc", f) is not None] - for adoc_file in adoc_files: - this_path = os.path.join(output_path, adoc_file) - with open(this_path) as h: - content = h.read() - # fix links - content = fix_external_links(content, h_json) - # fix heading levels for non-included pages - src_html_file = re.sub(".adoc", ".html", adoc_file) - if src_html_file not in toc_list: - adoc = decrease_heading_levels(adoc) - for link in updated_links: - content = re.sub(link, updated_links[link], content) - write_output(this_path, content) - - # make the group adoc files - # include::micropython/what-board.adoc[] - for item in h_json: - group_adoc = "= " + item['name'] + "\n\n" - group_adoc = group_adoc + item['description'] + "\n\n" - if 'html' in item: - item_filename = item['html'] - for toc_item in item["subitems"]: - group_adoc = walk_json(toc_item,group_adoc,output_path) - group_output_path = os.path.join(output_path, item["group_id"] + ".adoc") - write_output(group_output_path, group_adoc) - # write the json structure file as well - write_output(output_json, json.dumps(h_json, indent="\t")) - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - print("ERROR: ", e, exc_tb.tb_lineno) - return - -if __name__ == "__main__": - html_path = sys.argv[1] - output_path = sys.argv[2] - header_path = sys.argv[3] - output_json = sys.argv[4] - handler(html_path, output_path, header_path, output_json)