From f04bcd3d85a3ad692d5e9b677809027064e17254 Mon Sep 17 00:00:00 2001 From: Bruno Dias Date: Wed, 26 Jun 2024 13:14:44 -0300 Subject: [PATCH] update wiki registry. --- clib.json | 2 +- deps/wiki-registry/package.json | 4 +-- deps/wiki-registry/wiki-registry.c | 53 ++++++++++++++++-------------- test/search-basic.sh | 4 +-- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/clib.json b/clib.json index dba3adaa..df88687b 100644 --- a/clib.json +++ b/clib.json @@ -9,7 +9,7 @@ "which": "0.1.3", "stephenmathieson/str-flatten.c": "0.0.4", "commander": "1.3.2", - "stephenmathieson/wiki-registry.c": "0.0.4", + "clibs/wiki-registry.c": "0.1.2", "stephenmathieson/case.c": "0.1.3", "jwerle/fs.c": "0.2.0", "stephenmathieson/str-replace.c": "0.0.6", diff --git a/deps/wiki-registry/package.json b/deps/wiki-registry/package.json index 6ab5362e..19bf5a82 100644 --- a/deps/wiki-registry/package.json +++ b/deps/wiki-registry/package.json @@ -1,7 +1,7 @@ { "name": "wiki-registry", - "version": "0.0.4", - "repo": "stephenmathieson/wiki-registry.c", + "version": "0.1.2", + "repo": "clibs/wiki-registry.c", "description": "Turn a GitHub Wiki page into a package registry", "keywords": [ "registry", "github", "wiki" ], "license": "MIT", diff --git a/deps/wiki-registry/wiki-registry.c b/deps/wiki-registry/wiki-registry.c index e06e8c5a..069ebc67 100644 --- a/deps/wiki-registry/wiki-registry.c +++ b/deps/wiki-registry/wiki-registry.c @@ -1,4 +1,3 @@ - // // wiki-registry.c // @@ -96,31 +95,37 @@ wiki_registry_parse(const char *html) { GumboNode *body = gumbo_get_element_by_id("wiki-body", output->root); if (body) { - // grab all category `

`s - list_t *h2s = gumbo_get_elements_by_tag_name("h2", body); - list_node_t *heading_node; - list_iterator_t *heading_iterator = list_iterator_new(h2s, LIST_HEAD); - while ((heading_node = list_iterator_next(heading_iterator))) { - GumboNode *heading = (GumboNode *) heading_node->val; - char *category = gumbo_text_content(heading); - // die if we failed to parse a category, as it's - // almost certinaly a malloc error - if (!category) break; - trim(case_lower(category)); - GumboVector *siblings = &heading->parent->v.element.children; - size_t pos = heading->index_within_parent; - - // skip elements until the UL - // TODO: don't hardcode position here - // 2: - // 1 - whitespace - // 2 - actual node - GumboNode *ul = siblings->data[pos + 2]; - if (GUMBO_TAG_UL != ul->v.element.tag) { - free(category); + GumboNode* markdown_body = ((GumboNode*)((GumboVector)body->v.element.children).data[1]); + GumboVector children = (GumboVector)markdown_body->v.element.children; + + size_t count = children.length - 1; + + for (size_t index = 0; index < count; index++) { + GumboNode *heading = (GumboNode *)children.data[index]; + GumboNode *ul = NULL; + + if (heading->v.element.tag != GUMBO_TAG_DIV) { + continue; + } + + GumboAttribute *node_id = gumbo_get_attribute(&heading->v.element.attributes, "class"); + if (node_id == NULL || strncmp(node_id->value, "markdown-heading", 16) != 0) { continue; } + for (; index < count; index++) { + ul = (GumboNode *)children.data[index]; + + if (ul->v.element.tag == GUMBO_TAG_UL) { + break; + } + } + + list_t *h2 = gumbo_get_elements_by_tag_name("h2", heading); + char *category = gumbo_text_content(h2->head->val); + if (!category) break; + trim(case_lower(category)); + list_t *lis = gumbo_get_elements_by_tag_name("li", ul); list_iterator_t *li_iterator = list_iterator_new(lis, LIST_HEAD); list_node_t *li_node; @@ -138,8 +143,6 @@ wiki_registry_parse(const char *html) { list_destroy(lis); free(category); } - list_iterator_destroy(heading_iterator); - list_destroy(h2s); } gumbo_destroy_output(&kGumboDefaultOptions, output); diff --git a/test/search-basic.sh b/test/search-basic.sh index 4aa84537..7343cf88 100755 --- a/test/search-basic.sh +++ b/test/search-basic.sh @@ -3,14 +3,14 @@ CACHE=$TMPDIR/clib-search.cache rm -f "$CACHE" 2> /dev/null -N=$(clib search | wc -l) +N=$(./clib-search | wc -l) # lame check for more than 100 lines of output [ "$N" -lt 100 ] && { echo >&2 "Expected \`clib search\` to return at least 100 results" exit 1 } -TRIM=$(clib search trim) +TRIM=$(./clib-search trim) case "$TRIM" in *"stephenmathieson/trim.c"*) :