196 lines
No EOL
7.5 KiB
JavaScript
196 lines
No EOL
7.5 KiB
JavaScript
"use strict";
|
|
/**
|
|
* @file **unraw** | Convert raw escape sequences to their respective characters
|
|
* (undo `String.raw`).
|
|
* @author Ian Sanders
|
|
* @copyright 2019 Ian Sanders
|
|
* @license MIT
|
|
*/
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const errors_1 = require("./errors");
|
|
exports.ErrorType = errors_1.ErrorType;
|
|
exports.errorMessages = errors_1.errorMessages;
|
|
/**
|
|
* Parse a string as a base-16 number. This is more strict than `parseInt` as it
|
|
* will not allow any other characters, including (for example) "+", "-", and
|
|
* ".".
|
|
* @param hex A string containing a hexadecimal number.
|
|
* @returns The parsed integer, or `NaN` if the string is not a valid hex
|
|
* number.
|
|
*/
|
|
function parseHexToInt(hex) {
|
|
const isOnlyHexChars = !hex.match(/[^a-f0-9]/i);
|
|
return isOnlyHexChars ? parseInt(hex, 16) : NaN;
|
|
}
|
|
/**
|
|
* Check the validity and length of a hexadecimal code and optionally enforces
|
|
* a specific number of hex digits.
|
|
* @param hex The string to validate and parse.
|
|
* @param errorName The name of the error message to throw a `SyntaxError` with
|
|
* if `hex` is invalid. This is used to index `errorMessages`.
|
|
* @param enforcedLength If provided, will throw an error if `hex` is not
|
|
* exactly this many characters.
|
|
* @returns The parsed hex number as a normal number.
|
|
* @throws {SyntaxError} If the code is not valid.
|
|
*/
|
|
function validateAndParseHex(hex, errorName, enforcedLength) {
|
|
const parsedHex = parseHexToInt(hex);
|
|
if (Number.isNaN(parsedHex) ||
|
|
(enforcedLength !== undefined && enforcedLength !== hex.length)) {
|
|
throw new SyntaxError(errors_1.errorMessages.get(errorName));
|
|
}
|
|
return parsedHex;
|
|
}
|
|
/**
|
|
* Parse a two-digit hexadecimal character escape code.
|
|
* @param code The two-digit hexadecimal number that represents the character to
|
|
* output.
|
|
* @returns The single character represented by the code.
|
|
* @throws {SyntaxError} If the code is not valid hex or is not the right
|
|
* length.
|
|
*/
|
|
function parseHexadecimalCode(code) {
|
|
const parsedCode = validateAndParseHex(code, errors_1.ErrorType.MalformedHexadecimal, 2);
|
|
return String.fromCharCode(parsedCode);
|
|
}
|
|
/**
|
|
* Parse a four-digit Unicode character escape code.
|
|
* @param code The four-digit unicode number that represents the character to
|
|
* output.
|
|
* @param surrogateCode Optional four-digit unicode surrogate that represents
|
|
* the other half of the character to output.
|
|
* @returns The single character represented by the code.
|
|
* @throws {SyntaxError} If the codes are not valid hex or are not the right
|
|
* length.
|
|
*/
|
|
function parseUnicodeCode(code, surrogateCode) {
|
|
const parsedCode = validateAndParseHex(code, errors_1.ErrorType.MalformedUnicode, 4);
|
|
if (surrogateCode !== undefined) {
|
|
const parsedSurrogateCode = validateAndParseHex(surrogateCode, errors_1.ErrorType.MalformedUnicode, 4);
|
|
return String.fromCharCode(parsedCode, parsedSurrogateCode);
|
|
}
|
|
return String.fromCharCode(parsedCode);
|
|
}
|
|
/**
|
|
* Test if the text is surrounded by curly braces (`{}`).
|
|
* @param text Text to check.
|
|
* @returns `true` if the text is in the form `{*}`.
|
|
*/
|
|
function isCurlyBraced(text) {
|
|
return text.charAt(0) === "{" && text.charAt(text.length - 1) === "}";
|
|
}
|
|
/**
|
|
* Parse a Unicode code point character escape code.
|
|
* @param codePoint A unicode escape code point, including the surrounding curly
|
|
* braces.
|
|
* @returns The single character represented by the code.
|
|
* @throws {SyntaxError} If the code is not valid hex or does not have the
|
|
* surrounding curly braces.
|
|
*/
|
|
function parseUnicodeCodePointCode(codePoint) {
|
|
if (!isCurlyBraced(codePoint)) {
|
|
throw new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.MalformedUnicode));
|
|
}
|
|
const withoutBraces = codePoint.slice(1, -1);
|
|
const parsedCode = validateAndParseHex(withoutBraces, errors_1.ErrorType.MalformedUnicode);
|
|
try {
|
|
return String.fromCodePoint(parsedCode);
|
|
}
|
|
catch (err) {
|
|
throw err instanceof RangeError
|
|
? new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.CodePointLimit))
|
|
: err;
|
|
}
|
|
}
|
|
// Have to give overload that takes boolean for when compiler doesn't know if
|
|
// true or false
|
|
function parseOctalCode(code, error = false) {
|
|
if (error) {
|
|
throw new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.OctalDeprecation));
|
|
}
|
|
// The original regex only allows digits so we don't need to have a strict
|
|
// octal parser like hexToInt. Length is not enforced for octals.
|
|
const parsedCode = parseInt(code, 8);
|
|
return String.fromCharCode(parsedCode);
|
|
}
|
|
/**
|
|
* Map of unescaped letters to their corresponding special JS escape characters.
|
|
* Intentionally does not include characters that map to themselves like "\'".
|
|
*/
|
|
const singleCharacterEscapes = new Map([
|
|
["b", "\b"],
|
|
["f", "\f"],
|
|
["n", "\n"],
|
|
["r", "\r"],
|
|
["t", "\t"],
|
|
["v", "\v"],
|
|
["0", "\0"]
|
|
]);
|
|
/**
|
|
* Parse a single character escape sequence and return the matching character.
|
|
* If none is matched, defaults to `code`.
|
|
* @param code A single character code.
|
|
*/
|
|
function parseSingleCharacterCode(code) {
|
|
return singleCharacterEscapes.get(code) || code;
|
|
}
|
|
/**
|
|
* Matches every escape sequence possible, including invalid ones.
|
|
*
|
|
* All capture groups (described below) are unique (only one will match), except
|
|
* for 4, which can only potentially match if 3 does.
|
|
*
|
|
* **Capture Groups:**
|
|
* 0. A single backslash
|
|
* 1. Hexadecimal code
|
|
* 2. Unicode code point code with surrounding curly braces
|
|
* 3. Unicode escape code with surrogate
|
|
* 4. Surrogate code
|
|
* 5. Unicode escape code without surrogate
|
|
* 6. Octal code _NOTE: includes "0"._
|
|
* 7. A single character (will never be \, x, u, or 0-3)
|
|
*/
|
|
const escapeMatch = /\\(?:(\\)|x([\s\S]{0,2})|u(\{[^}]*\}?)|u([\s\S]{4})\\u([^{][\s\S]{0,3})|u([\s\S]{0,4})|([0-3]?[0-7]{1,2})|([\s\S])|$)/g;
|
|
/**
|
|
* Replace raw escape character strings with their escape characters.
|
|
* @param raw A string where escape characters are represented as raw string
|
|
* values like `\'` rather than `'`.
|
|
* @param allowOctals If `true`, will process the now-deprecated octal escape
|
|
* sequences (ie, `\111`).
|
|
* @returns The processed string, with escape characters replaced by their
|
|
* respective actual Unicode characters.
|
|
*/
|
|
function unraw(raw, allowOctals = false) {
|
|
return raw.replace(escapeMatch, function (_, backslash, hex, codePoint, unicodeWithSurrogate, surrogate, unicode, octal, singleCharacter) {
|
|
// Compare groups to undefined because empty strings mean different errors
|
|
// Otherwise, `\u` would fail the same as `\` which is wrong.
|
|
if (backslash !== undefined) {
|
|
return "\\";
|
|
}
|
|
if (hex !== undefined) {
|
|
return parseHexadecimalCode(hex);
|
|
}
|
|
if (codePoint !== undefined) {
|
|
return parseUnicodeCodePointCode(codePoint);
|
|
}
|
|
if (unicodeWithSurrogate !== undefined) {
|
|
return parseUnicodeCode(unicodeWithSurrogate, surrogate);
|
|
}
|
|
if (unicode !== undefined) {
|
|
return parseUnicodeCode(unicode);
|
|
}
|
|
if (octal === "0") {
|
|
return "\0";
|
|
}
|
|
if (octal !== undefined) {
|
|
return parseOctalCode(octal, !allowOctals);
|
|
}
|
|
if (singleCharacter !== undefined) {
|
|
return parseSingleCharacterCode(singleCharacter);
|
|
}
|
|
throw new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.EndOfString));
|
|
});
|
|
}
|
|
exports.unraw = unraw;
|
|
exports.default = unraw;
|
|
//# sourceMappingURL=index.js.map
|