urltomarkdown/node_modules/tr46/index.js

341 lines
8.5 KiB
JavaScript

"use strict";
const punycode = require("punycode/");
const regexes = require("./lib/regexes.js");
const mappingTable = require("./lib/mappingTable.json");
const { STATUS_MAPPING } = require("./lib/statusMapping.js");
function containsNonASCII(str) {
return /[^\x00-\x7F]/u.test(str);
}
function findStatus(val, { useSTD3ASCIIRules }) {
let start = 0;
let end = mappingTable.length - 1;
while (start <= end) {
const mid = Math.floor((start + end) / 2);
const target = mappingTable[mid];
const min = Array.isArray(target[0]) ? target[0][0] : target[0];
const max = Array.isArray(target[0]) ? target[0][1] : target[0];
if (min <= val && max >= val) {
if (useSTD3ASCIIRules &&
(target[1] === STATUS_MAPPING.disallowed_STD3_valid || target[1] === STATUS_MAPPING.disallowed_STD3_mapped)) {
return [STATUS_MAPPING.disallowed, ...target.slice(2)];
} else if (target[1] === STATUS_MAPPING.disallowed_STD3_valid) {
return [STATUS_MAPPING.valid, ...target.slice(2)];
} else if (target[1] === STATUS_MAPPING.disallowed_STD3_mapped) {
return [STATUS_MAPPING.mapped, ...target.slice(2)];
}
return target.slice(1);
} else if (min > val) {
end = mid - 1;
} else {
start = mid + 1;
}
}
return null;
}
function mapChars(domainName, { useSTD3ASCIIRules, transitionalProcessing }) {
let processed = "";
for (const ch of domainName) {
const [status, mapping] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules });
switch (status) {
case STATUS_MAPPING.disallowed:
processed += ch;
break;
case STATUS_MAPPING.ignored:
break;
case STATUS_MAPPING.mapped:
if (transitionalProcessing && ch === "ẞ") {
processed += "ss";
} else {
processed += mapping;
}
break;
case STATUS_MAPPING.deviation:
if (transitionalProcessing) {
processed += mapping;
} else {
processed += ch;
}
break;
case STATUS_MAPPING.valid:
processed += ch;
break;
}
}
return processed;
}
function validateLabel(label, {
checkHyphens,
checkBidi,
checkJoiners,
transitionalProcessing,
useSTD3ASCIIRules,
isBidi
}) {
// "must be satisfied for a non-empty label"
if (label.length === 0) {
return true;
}
// "1. The label must be in Unicode Normalization Form NFC."
if (label.normalize("NFC") !== label) {
return false;
}
const codePoints = Array.from(label);
// "2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the
// third and fourth positions."
//
// "3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character."
if (checkHyphens) {
if ((codePoints[2] === "-" && codePoints[3] === "-") ||
(label.startsWith("-") || label.endsWith("-"))) {
return false;
}
}
// "4. If not CheckHyphens, the label must not begin with “xn--”."
// Disabled while we figure out https://github.com/whatwg/url/issues/803.
// if (!checkHyphens) {
// if (label.startsWith("xn--")) {
// return false;
// }
// }
// "5. The label must not contain a U+002E ( . ) FULL STOP."
if (label.includes(".")) {
return false;
}
// "6. The label must not begin with a combining mark, that is: General_Category=Mark."
if (regexes.combiningMarks.test(codePoints[0])) {
return false;
}
// "7. Each code point in the label must only have certain Status values according to Section 5"
for (const ch of codePoints) {
const [status] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules });
if (transitionalProcessing) {
// "For Transitional Processing (deprecated), each value must be valid."
if (status !== STATUS_MAPPING.valid) {
return false;
}
} else if (status !== STATUS_MAPPING.valid && status !== STATUS_MAPPING.deviation) {
// "For Nontransitional Processing, each value must be either valid or deviation."
return false;
}
}
// "8. If CheckJoiners, the label must satisify the ContextJ rules"
// https://tools.ietf.org/html/rfc5892#appendix-A
if (checkJoiners) {
let last = 0;
for (const [i, ch] of codePoints.entries()) {
if (ch === "\u200C" || ch === "\u200D") {
if (i > 0) {
if (regexes.combiningClassVirama.test(codePoints[i - 1])) {
continue;
}
if (ch === "\u200C") {
// TODO: make this more efficient
const next = codePoints.indexOf("\u200C", i + 1);
const test = next < 0 ? codePoints.slice(last) : codePoints.slice(last, next);
if (regexes.validZWNJ.test(test.join(""))) {
last = i + 1;
continue;
}
}
}
return false;
}
}
}
// "9. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy..."
// https://tools.ietf.org/html/rfc5893#section-2
if (checkBidi && isBidi) {
let rtl;
// 1
if (regexes.bidiS1LTR.test(codePoints[0])) {
rtl = false;
} else if (regexes.bidiS1RTL.test(codePoints[0])) {
rtl = true;
} else {
return false;
}
if (rtl) {
// 2-4
if (!regexes.bidiS2.test(label) ||
!regexes.bidiS3.test(label) ||
(regexes.bidiS4EN.test(label) && regexes.bidiS4AN.test(label))) {
return false;
}
} else if (!regexes.bidiS5.test(label) ||
!regexes.bidiS6.test(label)) { // 5-6
return false;
}
}
return true;
}
function isBidiDomain(labels) {
const domain = labels.map(label => {
if (label.startsWith("xn--")) {
try {
return punycode.decode(label.substring(4));
} catch (err) {
return "";
}
}
return label;
}).join(".");
return regexes.bidiDomain.test(domain);
}
function processing(domainName, options) {
// 1. Map.
let string = mapChars(domainName, options);
// 2. Normalize.
string = string.normalize("NFC");
// 3. Break.
const labels = string.split(".");
const isBidi = isBidiDomain(labels);
// 4. Convert/Validate.
let error = false;
for (const [i, origLabel] of labels.entries()) {
let label = origLabel;
let transitionalProcessingForThisLabel = options.transitionalProcessing;
if (label.startsWith("xn--")) {
if (containsNonASCII(label)) {
error = true;
continue;
}
try {
label = punycode.decode(label.substring(4));
} catch {
if (!options.ignoreInvalidPunycode) {
error = true;
continue;
}
}
labels[i] = label;
transitionalProcessingForThisLabel = false;
}
// No need to validate if we already know there is an error.
if (error) {
continue;
}
const validation = validateLabel(label, {
...options,
transitionalProcessing: transitionalProcessingForThisLabel,
isBidi
});
if (!validation) {
error = true;
}
}
return {
string: labels.join("."),
error
};
}
function toASCII(domainName, {
checkHyphens = false,
checkBidi = false,
checkJoiners = false,
useSTD3ASCIIRules = false,
verifyDNSLength = false,
transitionalProcessing = false,
ignoreInvalidPunycode = false
} = {}) {
const result = processing(domainName, {
checkHyphens,
checkBidi,
checkJoiners,
useSTD3ASCIIRules,
transitionalProcessing,
ignoreInvalidPunycode
});
let labels = result.string.split(".");
labels = labels.map(l => {
if (containsNonASCII(l)) {
try {
return `xn--${punycode.encode(l)}`;
} catch (e) {
result.error = true;
}
}
return l;
});
if (verifyDNSLength) {
const total = labels.join(".").length;
if (total > 253 || total === 0) {
result.error = true;
}
for (let i = 0; i < labels.length; ++i) {
if (labels[i].length > 63 || labels[i].length === 0) {
result.error = true;
break;
}
}
}
if (result.error) {
return null;
}
return labels.join(".");
}
function toUnicode(domainName, {
checkHyphens = false,
checkBidi = false,
checkJoiners = false,
useSTD3ASCIIRules = false,
transitionalProcessing = false,
ignoreInvalidPunycode = false
} = {}) {
const result = processing(domainName, {
checkHyphens,
checkBidi,
checkJoiners,
useSTD3ASCIIRules,
transitionalProcessing,
ignoreInvalidPunycode
});
return {
domain: result.string,
error: result.error
};
}
module.exports = {
toASCII,
toUnicode
};