Skip to content

Commit

Permalink
feat(util): add normalizeIdentifier function (#606)
Browse files Browse the repository at this point in the history
Signed-off-by: Matt Roberts <[email protected]>
  • Loading branch information
mttrbrts authored Feb 21, 2023
1 parent ffdcd6a commit 690ce26
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 3 deletions.
8 changes: 6 additions & 2 deletions packages/concerto-util/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ const TypedStack = require('./lib/typedstack');
// Label
const Label = require('./lib/label');

// Identifiers
const Identifiers = require('./lib/identifiers');

module.exports = {
BaseException,
BaseFileException,
Expand All @@ -63,5 +66,6 @@ module.exports = {
ModelWriter,
Logger,
TypedStack,
Label
};
Label,
Identifiers
};
74 changes: 74 additions & 0 deletions packages/concerto-util/lib/identifiers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

// Conforms to Concerto Spec for identifiers
const ID_REGEX = /^(\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\\u[0-9A-Fa-f]{4})(?:\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\\u[0-9A-Fa-f]{4}|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\u200C|\u200D)*$/u;

/**
* Function that attempts to normalize arbitrary strings
* into valid Concerto identifiers
*
* @param {string} identifier - the input value
* @param {number} [truncateLength] - Length at which to truncate the identifier
* @returns {string} - An identifier that meets the Concerto specification
*/
function normalizeIdentifier(identifier, truncateLength = -1) {
const replacer = (_match, group1) => {
let escapedChar = '';
// Loop through characters with multiple code points
for (const codePoint of group1) {
escapedChar += `_${codePoint.codePointAt(0).toString(16)}`;
}
return escapedChar;
};

// Stringify null & undefined values
let result = identifier ?? String(identifier);

if (typeof result !== 'string'){
throw new Error(`Unsupported identifier type, '${typeof result}'.`);
}

// 1. If the identifier begins with a number, add a leading underscore
result = result
.replace(/^\p{Nd}/u, '_$&')

// 2. Substitute Whitespace, and joiners
.replace(/[-‐−@#:;><|/\\\u200c\u200d]/g, '_')
.replace(/\s/g, '_')

// 3a. Replace Invalid Characters
.replace(/(?!\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\u200C|\u200D|\\u[0-9A-Fa-f]{4})(.)/gu, replacer)

// 3b. Escape Surrogate Pairs
.replace(/([\uD800-\uDFFF])/g, replacer);

// 4. Optionally truncate the identifier
if (truncateLength > 0){
result = result.substring(0,truncateLength);
}

// Check validity
if (!ID_REGEX.test(result)){
throw new Error(`Unexpected error. Not able to escape identifier '${result}'.`);
}
return result;
}

module.exports = {
normalizeIdentifier,
ID_REGEX
};
111 changes: 111 additions & 0 deletions packages/concerto-util/test/identifiers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const { normalizeIdentifier } = require('../lib/identifiers');

require('chai').should();

describe('Identifiers', function () {

describe('normalizeIdentifier', function() {
const ids = [
// No-op Values
['a'], // Letter, lowercase
['ՠ'], // Letter, lowercase. Unicode 11.0
['A'], // Letter, uppercase
['ĦĔĽĻŎ'], // Letter, uppercase
['Dž'], // Letter, titlecase
['ᾩ'], // Letter, titlecase
['〱〱〱〱'], // Letter, modifier
['जावास्क्रिप्ट'], // Letter, other
['Ⅶ'], // Number, letter
['$class'], // leading $
['_class'], // leading _
['\u03C9'], // Escaped Unicode Code Point, ᾧ
['abc'], // Letter, lowercase
['a123'], // Number, digit
['foo$bar'], // $ separator
['foo_bar'], // _ separator
['αβγδεζηθ'], // Letter, lowercase
['foo\u03C9bar'], // Escaped Unicode Code Point, fooᾧbar
['foo\u03c9bar'], // Escaped Unicode Code Point lowercase, fooᾧbar
['foo‿bar'], // Punctuation, connector
['पः'], // Mark, combining character
['CharlesⅢ'], // Number, letter
['true'], // reserved words
['false'],
['null'],
['while'],
['for'],
['nully'], // leading reserved word
['こんにちは世界'], // Japanese
['foo‌bar', 'foo_bar'], // unescaped zero-width non-joiner
['foo‍bar', 'foo_bar'], // unescaped zero-width joiner

// Bad Identifiers
['123', '_123'],
['1st', '_1st'],
['foo bar', 'foo_bar'],
['foo\u0020bar', 'foo_bar'], // Escaped Unicode, space
['foo\x3Dbar', 'foo_3dbar'], // Escaped Hex Sequence, foo=bar
['foo\x3Dbar', 'foo_3dbar'], // Escaped Hex Sequence, foo=bar
['‍foo', '_foo'], // leading unescaped zero-width joiner
['foo-bar', 'foo_bar'],
['foo‐bar', 'foo_bar'], // U+2010 HYPHEN'
['foo−bar', 'foo_bar'], // U+2212 MINUS
['foo|bar', 'foo_bar'],
['foo@bar', 'foo_bar'],
['foo#bar', 'foo_bar'],
['foo/bar', 'foo_bar'],
['foo>bar', 'foo_bar'],
['\x3D', '_3d'], // Escaped Hex Sequence, =
['😄', '_1f604'], // Surrogate pair, Emoji
['\u{1F604}', '_1f604'], // Escaped surrogate pair, Emoji
['𐴓𐴠𐴑𐴤𐴝', '_d803_dd13_d803_dd20_d803_dd11'], // Surrogate pairs, Hanifi Rohingya RTL
[null, 'null'],
[undefined, 'undefined'],
];
ids.forEach(([id, expectedValue]) => {
it(`'${id}' should equal '${expectedValue ?? id}'`, function() {
normalizeIdentifier(id, 30).should.equal(expectedValue ?? id);
});
});

it('should throw for empty string', () => {
(() => normalizeIdentifier('')).should.throw(/Unexpected error/);
});

it('should not normalize non string identifiers', () => {
(() => normalizeIdentifier({ a: 1 })).should.throw(/Unsupported identifier type/);
(() => normalizeIdentifier(Symbol.for('a'))).should.throw(/Unsupported identifier type/);
(() => normalizeIdentifier(false)).should.throw(/Unsupported identifier type/);
(() => normalizeIdentifier(true)).should.throw(/Unsupported identifier type/);
(() => normalizeIdentifier(1)).should.throw(/Unsupported identifier type/);
(() => normalizeIdentifier(1.112345678987654)).should.throw(/Unsupported identifier type/);
(() => normalizeIdentifier(3.1e2)).should.throw(/Unsupported identifier type/);
});

it('should truncate identifiers', () => {
normalizeIdentifier('a', 2).should.equal('a');
normalizeIdentifier('aaa', 2).should.equal('aa');
normalizeIdentifier('aaa', 0).should.equal('aaa');
normalizeIdentifier('aaa', -1).should.equal('aaa');
normalizeIdentifier('$a', 1).should.equal('$');
normalizeIdentifier('😄', 2).should.equal('_1');
normalizeIdentifier('𐴓', 2).should.equal('_d'); // surrogate pair character
});
});
});
3 changes: 2 additions & 1 deletion packages/concerto-util/types/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ import ModelWriter = require("./lib/modelwriter");
import Logger = require("./lib/logger");
import TypedStack = require("./lib/typedstack");
import Label = require("./lib/label");
export { BaseException, BaseFileException, FileDownloader, CompositeFileLoader, DefaultFileLoader, GitHubFileLoader, HTTPFileLoader, Writer, FileWriter, InMemoryWriter, ModelWriter, Logger, TypedStack, Label };
import Identifiers = require("./lib/identifiers");
export { BaseException, BaseFileException, FileDownloader, CompositeFileLoader, DefaultFileLoader, GitHubFileLoader, HTTPFileLoader, Writer, FileWriter, InMemoryWriter, ModelWriter, Logger, TypedStack, Label, Identifiers };
10 changes: 10 additions & 0 deletions packages/concerto-util/types/lib/identifiers.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/**
* Function that attempts to normalize arbitrary strings
* into valid Concerto identifiers
*
* @param {string} identifier - the input value
* @param {number} [truncateLength] - Length at which to truncate the identifier
* @returns {string} - An identifier that meets the Concerto specification
*/
export function normalizeIdentifier(identifier: string, truncateLength?: number): string;
export const ID_REGEX: RegExp;

0 comments on commit 690ce26

Please sign in to comment.