Skip to content

Commit

Permalink
Allow extraction of pronouns from bio/note
Browse files Browse the repository at this point in the history
Closes #1
  • Loading branch information
nachtjasmin committed Jul 11, 2023
1 parent bee6830 commit 1405dc5
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 12 deletions.
1 change: 0 additions & 1 deletion src/libs/fetchPronouns.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ export async function fetchPronouns(dataID, accountName, type) {
let pronouns = await extractFromStatus(status);
if (!pronouns) {
pronouns = "null";
//TODO: if no field check bio
info(`no pronouns found for ${accountName}, cached null`);
}
await cachePronouns(accountName, pronouns);
Expand Down
150 changes: 139 additions & 11 deletions src/libs/pronouns.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,39 @@ const knownPronounUrls = [
export async function extractFromStatus(status) {
// get account from status and pull out fields
const account = status.account;
const fields = account.fields;
const { fields, note } = account;
let pronouns;

if (fields) {
for (const f of fields) {
pronouns = await extractFromField(f);
if (pronouns) break;
}
}

if (!pronouns && note) {
pronouns = extractFromBio(note);
}

return pronouns;
}

/**
* @param {{name: string, value: string}} field The field value
* @returns {Promise<string|null>} The pronouns or null.
*/
async function extractFromField(field) {
let pronounsRaw;
for (const field of fields) {
// TODO: add ranking of fields
if (pronounsRaw) break;

for (const matcher of fieldMatchers) {
if (typeof matcher === "string" && field.name.toLowerCase().includes(matcher)) {
pronounsRaw = field.value;
} else if (field.name.match(matcher)) {
pronounsRaw = field.value;
}
for (const matcher of fieldMatchers) {
if (typeof matcher === "string" && field.name.toLowerCase().includes(matcher)) {
pronounsRaw = field.value;
break;
} else if (field.name.match(matcher)) {
pronounsRaw = field.value;
break;
}
}

if (!pronounsRaw) return null;
let text = sanitizeHtml(pronounsRaw, { allowedTags: [], allowedAttributes: {} });
// If one of pronoun URLs matches, overwrite the current known value.
Expand Down Expand Up @@ -101,3 +119,113 @@ function sanitizePronounPageValue(val) {
if (val === "no-pronouns") val = "no pronouns";
return val;
}

const pronounAllowlist = [
"ae",
"aer",
"aers",
"aerself",
"co",
"co's",
"cos",
"coself",
"e",
"eir",
"eirs",
"em",
"ems",
"emself",
"es",
"ey",
"fae",
"faer",
"faers",
"faerself",
"he",
"her",
"hers",
"herself",
"him",
"himself",
"hir",
"hirs",
"hirself",
"his",
"hu",
"hum",
"hus",
"huself",
"it",
"its",
"itself",
"ne",
"nem",
"nemself",
"nir",
"nirs",
"nirself",
"one",
"one's",
"oneself",
"per",
"pers",
"perself",
"s/he",
"she",
"their",
"theirs",
"them",
"themself",
"themselves",
"they",
"thon",
"thon's",
"thons",
"thonself",
"ve",
"ver",
"vers",
"verself",
"vi",
"vim",
"vims",
"vimself",
"vir",
"virs",
"virself",
"vis",
"xe",
"xem",
"xemself",
"xyr",
"xyrs",
"ze",
"zhe",
"zher",
"zhers",
"zherself",
"zir",
"zirs",
"zirself",
];

/**
*
* @param {string} bio The bio
* @returns {string|null} The result or null
*/
function extractFromBio(bio) {
const exactMatches = bio.matchAll(/(\w+)\/(\w+)/gi);
for (const [match, subjective, objective] of exactMatches) {
if (pronounAllowlist.includes(subjective) && pronounAllowlist.includes(objective)) {
return match;
}
}

const followedByColon = bio.matchAll(/pronouns?:\W+([\w/+]+)/gi);
for (const [, pronouns] of followedByColon) {
return pronouns;
}

return null;
}
18 changes: 18 additions & 0 deletions tests/extractPronouns.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,21 @@ for (const [input, expects] of valueExtractionTests) {
}

valueExtractionSuite.run();

const bioExtractSuite = suite("bio extraction");
const bioExtractTests = [
["I'm cute and my pronouns are she/her", "she/her"], // exact match
["my pronouns are helicopter/joke", null], // not on allowlist
["pronouns: uwu/owo", "uwu/owo"], // followed by pronoun pattern
["pronouns: any", "any"], // followed by pronoun pattern,
];
for (const [input, expects] of bioExtractTests) {
bioExtractSuite(input, async () => {
const result = await pronouns.extractFromStatus({
account: { note: input },
});
assert.equal(result, expects);
});
}

bioExtractSuite.run();

0 comments on commit 1405dc5

Please sign in to comment.