From d9f5f04b7d951d71a0dfa5fbd1896717008c1f5e Mon Sep 17 00:00:00 2001 From: Michael Kohler Date: Sat, 26 Jun 2021 13:13:52 +0000 Subject: [PATCH] fix: migrate pa to pa-IN and remove CV -> SC language mapping --- scripts/exporter.js | 12 +++--------- server/lib/languages.js | 12 ++++++------ .../migrations/20210626134900-migrate-pa-to-pa-IN.js | 12 ++++++++++++ server/migrations/20210626134930-migrate-pa-users.js | 11 +++++++++++ 4 files changed, 32 insertions(+), 15 deletions(-) create mode 100644 server/migrations/20210626134900-migrate-pa-to-pa-IN.js create mode 100644 server/migrations/20210626134930-migrate-pa-users.js diff --git a/scripts/exporter.js b/scripts/exporter.js index 5f92c660..6c0625bd 100644 --- a/scripts/exporter.js +++ b/scripts/exporter.js @@ -12,11 +12,6 @@ const cleanup = require('../server/lib/cleanup'); const CV_LANGUAGES_URL = 'https://raw.githubusercontent.com/mozilla/common-voice/main/locales/all.json'; const OUTPUT_TXT = 'sentence-collector.txt'; -// Mapping from PONTOON locale -> SC locale code -const LANGUAGE_MAPPING = { - 'pa-IN': 'pa', -}; - const { API_BASE_URL, COMMON_VOICE_PATH, @@ -48,9 +43,8 @@ async function startExport() { async function exportLanguage(languageCode) { console.log(`Starting export for ${languageCode}..`); - const dbLanguageCode = LANGUAGE_MAPPING[languageCode] || languageCode; const cvPath = `${exportPath}/${languageCode}`; - const approvedSentencesUrl = `${API_BASE_URL}/sentences/text/approved/${dbLanguageCode}`; + const approvedSentencesUrl = `${API_BASE_URL}/sentences/text/approved/${languageCode}`; const approvedSentencesResponse = await fetch(approvedSentencesUrl); const approvedSentencesText = await approvedSentencesResponse.text(); @@ -64,8 +58,8 @@ async function exportLanguage(languageCode) { prepareExport(cvPath); console.log(` - Cleaning up sentences`); - const cleanedUpSentences = cleanup.cleanupSentences(dbLanguageCode, approvedSentences); - const dedupedSentences = dedupeSentences(dbLanguageCode, cleanedUpSentences, cvPath); + const cleanedUpSentences = cleanup.cleanupSentences(languageCode, approvedSentences); + const dedupedSentences = dedupeSentences(languageCode, cleanedUpSentences, cvPath); writeExport(cvPath, dedupedSentences); } diff --git a/server/lib/languages.js b/server/lib/languages.js index 99c774f9..19aaeb6e 100644 --- a/server/lib/languages.js +++ b/server/lib/languages.js @@ -2,14 +2,8 @@ const ISO6391 = require('iso-639-1'); const FALLBACK_LOCALE = 'en'; -const LANGUAGE_MAPPING = { - // CV - Sentence Collector - 'pa-IN': 'pa', -}; - module.exports = { FALLBACK_LOCALE, - LANGUAGE_MAPPING, getAllLanguages, }; @@ -164,6 +158,11 @@ const ADDITIONAL_LANGUAGES = [ name: 'Norwegian', nativeName: 'Norsk nynorsk', }, + { + id: 'pa-IN', + name: 'Panjabi', + nativeName: 'ਪੰਜਾਬੀ', + }, { // https://github.com/common-voice/common-voice/issues/3044 id: 'pap-AW', name: 'Papiamento - Aruba', @@ -257,6 +256,7 @@ const LANGUAGES_TO_REMOVE = [ 'ga', // covered by ga-IE 'sv', // covered by sv-SE 'ne', // covered by ne-NP + 'pa', // covered by pa-IN ]; const isoLanguages = ISO6391.getLanguages(ISO6391.getAllCodes()); diff --git a/server/migrations/20210626134900-migrate-pa-to-pa-IN.js b/server/migrations/20210626134900-migrate-pa-to-pa-IN.js new file mode 100644 index 00000000..24b6ebe4 --- /dev/null +++ b/server/migrations/20210626134900-migrate-pa-to-pa-IN.js @@ -0,0 +1,12 @@ +'use strict'; + +module.exports = { + up: (queryInterface) => { + return queryInterface.sequelize.query(` + UPDATE Sentences + SET localeId="pa-IN" + WHERE localeId="pa" + `); + }, + down: () => Promise.resolve(), +}; diff --git a/server/migrations/20210626134930-migrate-pa-users.js b/server/migrations/20210626134930-migrate-pa-users.js new file mode 100644 index 00000000..c31b0de3 --- /dev/null +++ b/server/migrations/20210626134930-migrate-pa-users.js @@ -0,0 +1,11 @@ +'use strict'; + +module.exports = { + up: async (queryInterface) => { + await queryInterface.sequelize.query("UPDATE Users SET languages = REPLACE(languages, 'pa', 'pa-IN') WHERE languages='pa'"); + await queryInterface.sequelize.query("UPDATE Users SET languages = REPLACE(languages, 'pa,', 'pa-IN,') WHERE languages LIKE 'pa,%'"); + await queryInterface.sequelize.query("UPDATE Users SET languages = REPLACE(languages, ',pa,', ',pa-IN,') WHERE languages LIKE '%,pa,%'"); + await queryInterface.sequelize.query("UPDATE Users SET languages = REPLACE(languages, ',pa', ',pa-IN') WHERE languages LIKE '%,pa'"); + }, + down: () => Promise.resolve(), +};