Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize pronoun.page URLs #47

Merged
merged 3 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 48 additions & 19 deletions src/libs/pronouns.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import sanitizeHtml from "sanitize-html";

const fieldMatchers = [/pro.*nouns?/i, "pronomen"];
const knownPronounUrls = [
/pronouns\.page\/:?([\w/@]+)/,
/pronouns\.within\.lgbt\/([\w/]+)/,
/pronouns\.cc\/pronouns\/([\w/]+)/,
/pronouns\.page\/(@(?<username>\w+))?:?(?<pronouns>[\w/:]+)?/,
/pronouns\.within\.lgbt\/(?<pronouns>[\w/]+)/,
/pronouns\.cc\/pronouns\/(?<pronouns>[\w/]+)/,
];

/**
Expand Down Expand Up @@ -58,24 +58,33 @@ async function extractFromField(field) {
// If one of pronoun URLs matches, overwrite the current known value.
for (const knownUrlRe of knownPronounUrls) {
if (!knownUrlRe.test(pronounsRaw)) continue;
text = pronounsRaw.match(knownUrlRe)[1];
}
const { pronouns, username } = pronounsRaw.match(knownUrlRe).groups;

// For now, only the pronouns.page regexp has a username value, so we can be sure
// that we don't query the wrong API.
if (username) {
return await queryUserFromPronounsPage(username);
}

// Right now, only the pronoun.page regex matches the @usernames.
if (text.charAt(0) === "@") {
text = await queryPronounsFromPronounsPage(text.substring(1));
// In case that we have single-word pronoun.page values, like "https://en.pronouns.page/it",
// we want to normalize that to include the possessive pronoun as well.
if (pronounsRaw.includes("pronouns.page") && !pronouns.includes("/")) {
return await normalizePronounPagePronouns(pronouns);
}

text = pronouns;
}

if (!text) return null;
return text;
}

/**
* Queries the pronouns from the pronouns.page API.
* @param {string} username The username of the person.
* @returns {Promise<string|null>} The pronouns that have set the "yes" opinion.
* Queries the pronouns for a given user from the pronouns.page API.
* @param {string} username The username of the person, without the leading "@".
* @returns {Promise<string|null>} The pronouns that have set the "yes" or "meh" opinion.
*/
async function queryPronounsFromPronounsPage(username) {
async function queryUserFromPronounsPage(username) {
// Example page: https://en.pronouns.page/api/profile/get/andrea?version=2
const resp = await fetch(`https://en.pronouns.page/api/profile/get/${username}?version=2`);
if (resp.status >= 400) {
Expand Down Expand Up @@ -105,20 +114,40 @@ async function queryPronounsFromPronounsPage(username) {
if (!pronouns) pronouns = profiles[0].pronouns;

let val = pronouns.find((x) => x.opinion === "yes" || x.opinion === "meh").value;
val = sanitizePronounPageValue(val);
val = await normalizePronounPagePronouns(val);
return val;
}

/**
* @param {string} val
* @returns {Promise<string>}
*/
function sanitizePronounPageValue(val) {
if (!val.startsWith("https://")) return val;
async function normalizePronounPagePronouns(val) {
const match = val.match(/pronouns\.page\/(.+)/);
if (match) val = match[1];

val = val.replace(/https?:\/\/.+\.pronouns\.page\/:?/, "");
if (val.includes("/")) return val;

if (val === "no-pronouns") val = "no pronouns";
return val;
if (val === "no-pronouns") return "no pronouns";

const pronounNameResp = await fetch("https://en.pronouns.page/api/pronouns/" + val);
if (!pronounNameResp.ok) {
// In case the request fails, better show the likely pronouns than nothing at all.
return val;
}

// If we query the pronouns.page API with invalid values, an empty body is returned, still with status code 200.
// Therefore, we just try to parse the JSON and if it does not work, we return the "val" from earlier and don't
// do further processing.
try {
const {
morphemes: { pronoun_subject, possessive_pronoun },
} = await pronounNameResp.json();

return [pronoun_subject, possessive_pronoun].join("/");
} catch {
return val;
}
}

/**
Expand Down Expand Up @@ -151,7 +180,7 @@ function sanitizePronouns(str) {
.join(" ");

// Remove trailing characters that are used as separators.
str = str.replace(/[-| /]+$/, "");
str = str.replace(/[-| :/]+$/, "");

// Finally, remove leading and trailing whitespace.
str = str.trim();
Expand Down
2 changes: 1 addition & 1 deletion src/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

"description": "puts pronouns next to usernames on mastodon",
"homepage_url": "https://github.com/ItsVipra/ProToots",
"permissions": ["storage", "https://en.pronouns.page/api/profile/get/*"],
"permissions": ["storage", "https://en.pronouns.page/api/*"],

"browser_action": {
"default_icon": "icons/icon small_size/icon small_size.png",
Expand Down
3 changes: 2 additions & 1 deletion tests/extractPronouns.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ const valueExtractionTests = [
null,
], // 404 errors
[`<a href="https://de.pronouns.page/:Katze"></a>`, "Katze"], // custom pronouns
[`<a href="https://de.pronouns.page/@benaryorg"></a>`, "Katze"], // custom pronouns in profile
[`<a href="https://de.pronouns.page/@benaryorg"></a>`, "Katze/Katze's"], // custom pronouns in profile
[`:theythem:`, null], // emojis shortcodes used for pronouns
[
// This is an actual example from a Mastodon field, with example.com redirecting to pronouns.page.
`dey/denen, es/ihm - <a href="https://example.com" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://</span><span class="">example.com</span><span class="invisible"></span></a>`,
"dey/denen, es/ihm",
],
["https://en.pronouns.page/it", "it/its"], // single-word pronoun pages
];
for (const [input, expects] of valueExtractionTests) {
valueExtractionSuite(input, async () => {
Expand Down