From 6858978b99aa027653ce7f7d08bebb1ac22c54f2 Mon Sep 17 00:00:00 2001
From: nachtjasmin <nachtjasmin@posteo.de>
Date: Sun, 16 Jul 2023 10:52:07 +0200
Subject: [PATCH 1/3] Refactor pronoun/username extraction with named groups

---
 src/libs/pronouns.js | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/src/libs/pronouns.js b/src/libs/pronouns.js
index 7385e59..2b10f88 100644
--- a/src/libs/pronouns.js
+++ b/src/libs/pronouns.js
@@ -2,9 +2,9 @@ import sanitizeHtml from "sanitize-html";
 
 const fieldMatchers = [/pro.*nouns?/i, "pronomen"];
 const knownPronounUrls = [
-	/pronouns\.page\/:?([\w/@]+)/,
-	/pronouns\.within\.lgbt\/([\w/]+)/,
-	/pronouns\.cc\/pronouns\/([\w/]+)/,
+	/pronouns\.page\/(@(?<username>\w+))?:?(?<pronouns>[\w/:]+)?/,
+	/pronouns\.within\.lgbt\/(?<pronouns>[\w/]+)/,
+	/pronouns\.cc\/pronouns\/(?<pronouns>[\w/]+)/,
 ];
 
 /**
@@ -58,12 +58,14 @@ async function extractFromField(field) {
 	// If one of pronoun URLs matches, overwrite the current known value.
 	for (const knownUrlRe of knownPronounUrls) {
 		if (!knownUrlRe.test(pronounsRaw)) continue;
-		text = pronounsRaw.match(knownUrlRe)[1];
-	}
+		const { pronouns, username } = pronounsRaw.match(knownUrlRe).groups;
+
+		// For now, only the pronouns.page regexp has a username value, so we can be sure
+		// that we don't query the wrong API.
+		if (username) {
+			return await queryUserFromPronounsPage(username);
+		}
 
-	// Right now, only the pronoun.page regex matches the @usernames.
-	if (text.charAt(0) === "@") {
-		text = await queryPronounsFromPronounsPage(text.substring(1));
 	}
 
 	if (!text) return null;
@@ -71,11 +73,11 @@ async function extractFromField(field) {
 }
 
 /**
- * Queries the pronouns from the pronouns.page API.
- * @param {string} username The username of the person.
- * @returns {Promise<string|null>} The pronouns that have set the "yes" opinion.
+ * Queries the pronouns for a given user from the pronouns.page API.
+ * @param {string} username The username of the person, without the leading "@".
+ * @returns {Promise<string|null>} The pronouns that have set the "yes" or "meh" opinion.
  */
-async function queryPronounsFromPronounsPage(username) {
+async function queryUserFromPronounsPage(username) {
 	// Example page: https://en.pronouns.page/api/profile/get/andrea?version=2
 	const resp = await fetch(`https://en.pronouns.page/api/profile/get/${username}?version=2`);
 	if (resp.status >= 400) {

From 83aa9b41b0858501157c1edae445e138313afb59 Mon Sep 17 00:00:00 2001
From: nachtjasmin <nachtjasmin@posteo.de>
Date: Sun, 16 Jul 2023 10:53:16 +0200
Subject: [PATCH 2/3] Add pronoun.page pronoun normalization

Especially for single-value pronoun.page URLs we want to show the
possessive pronoun as well. This is implemented by querying the API once
again and asking for the correct pronouns.

Closes #44.
---
 src/libs/pronouns.js          | 41 +++++++++++++++++++++++++++++------
 src/manifest.json             |  2 +-
 tests/extractPronouns.spec.js |  3 ++-
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/libs/pronouns.js b/src/libs/pronouns.js
index 2b10f88..3f9fb59 100644
--- a/src/libs/pronouns.js
+++ b/src/libs/pronouns.js
@@ -66,6 +66,13 @@ async function extractFromField(field) {
 			return await queryUserFromPronounsPage(username);
 		}
 
+		// In case that we have single-word pronoun.page values, like "https://en.pronouns.page/it",
+		// we want to normalize that to include the possessive pronoun as well.
+		if (pronounsRaw.includes("pronouns.page") && !pronouns.includes("/")) {
+			return await normalizePronounPagePronouns(pronouns)
+		}
+
+		text = pronouns;
 	}
 
 	if (!text) return null;
@@ -107,20 +114,40 @@ async function queryUserFromPronounsPage(username) {
 	if (!pronouns) pronouns = profiles[0].pronouns;
 
 	let val = pronouns.find((x) => x.opinion === "yes" || x.opinion === "meh").value;
-	val = sanitizePronounPageValue(val);
+	val = await normalizePronounPagePronouns(val);
 	return val;
 }
 
 /**
  * @param {string} val
+ * @returns {Promise<string>}
  */
-function sanitizePronounPageValue(val) {
-	if (!val.startsWith("https://")) return val;
+async function normalizePronounPagePronouns(val) {
+	const match = val.match(/pronouns\.page\/(.+)/);
+	if (match) val = match[1];
 
-	val = val.replace(/https?:\/\/.+\.pronouns\.page\/:?/, "");
+	if (val.includes("/")) return val;
 
-	if (val === "no-pronouns") val = "no pronouns";
-	return val;
+	if (val === "no-pronouns") return "no pronouns";
+
+	const pronounNameResp = await fetch("https://en.pronouns.page/api/pronouns/" + val);
+	if (!pronounNameResp.ok) {
+		// In case the request fails, better show the likely pronouns than nothing at all.
+		return val;
+	}
+
+	// If we query the pronouns.page API with invalid values, an empty body is returned, still with status code 200.
+	// Therefore, we just try to parse the JSON and if it does not work, we return the "val" from earlier and don't
+	// do further processing.
+	try {
+		const {
+			morphemes: { pronoun_subject, possessive_pronoun },
+		} = await pronounNameResp.json();
+
+		return [pronoun_subject, possessive_pronoun].join("/");
+	} catch {
+		return val;
+	}
 }
 
 /**
@@ -153,7 +180,7 @@ function sanitizePronouns(str) {
 		.join(" ");
 
 	// Remove trailing characters that are used as separators.
-	str = str.replace(/[-| /]+$/, "");
+	str = str.replace(/[-| :/]+$/, "");
 
 	// Finally, remove leading and trailing whitespace.
 	str = str.trim();
diff --git a/src/manifest.json b/src/manifest.json
index 171e378..690205f 100644
--- a/src/manifest.json
+++ b/src/manifest.json
@@ -10,7 +10,7 @@
 
 	"description": "puts pronouns next to usernames on mastodon",
 	"homepage_url": "https://github.com/ItsVipra/ProToots",
-	"permissions": ["storage", "https://en.pronouns.page/api/profile/get/*"],
+	"permissions": ["storage", "https://en.pronouns.page/api/*"],
 
 	"browser_action": {
 		"default_icon": "icons/icon small_size/icon small_size.png",
diff --git a/tests/extractPronouns.spec.js b/tests/extractPronouns.spec.js
index ab08d7f..8f65e77 100644
--- a/tests/extractPronouns.spec.js
+++ b/tests/extractPronouns.spec.js
@@ -57,13 +57,14 @@ const valueExtractionTests = [
 		null,
 	], // 404 errors
 	[`<a href="https://de.pronouns.page/:Katze"></a>`, "Katze"], // custom pronouns
-	[`<a href="https://de.pronouns.page/@benaryorg"></a>`, "Katze"], // custom pronouns in profile
+	[`<a href="https://de.pronouns.page/@benaryorg"></a>`, "Katze/Katze's"], // custom pronouns in profile
 	[`:theythem:`, null], // emojis shortcodes used for pronouns
 	[
 		// This is an actual example from a Mastodon field, with example.com redirecting to pronouns.page.
 		`dey/denen, es/ihm - <a href="https://example.com" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://</span><span class="">example.com</span><span class="invisible"></span></a>`,
 		"dey/denen, es/ihm",
 	],
+	["https://en.pronouns.page/it", "it/its"], // single-word pronoun pages
 ];
 for (const [input, expects] of valueExtractionTests) {
 	valueExtractionSuite(input, async () => {

From 7f7231836cd93e5d7435d7c377ccc50dc55738fd Mon Sep 17 00:00:00 2001
From: nachtjasmin <nachtjasmin@users.noreply.github.com>
Date: Sun, 16 Jul 2023 09:05:39 +0000
Subject: [PATCH 3/3] Format files using prettier

---
 src/libs/pronouns.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/libs/pronouns.js b/src/libs/pronouns.js
index 3f9fb59..a70fbea 100644
--- a/src/libs/pronouns.js
+++ b/src/libs/pronouns.js
@@ -69,7 +69,7 @@ async function extractFromField(field) {
 		// In case that we have single-word pronoun.page values, like "https://en.pronouns.page/it",
 		// we want to normalize that to include the possessive pronoun as well.
 		if (pronounsRaw.includes("pronouns.page") && !pronouns.includes("/")) {
-			return await normalizePronounPagePronouns(pronouns)
+			return await normalizePronounPagePronouns(pronouns);
 		}
 
 		text = pronouns;