urltomarkdown/node_modules/parse5/lib/serializer/index.js

177 lines
5.1 KiB
JavaScript

'use strict';
const defaultTreeAdapter = require('../tree-adapters/default');
const mergeOptions = require('../utils/merge-options');
const doctype = require('../common/doctype');
const HTML = require('../common/html');
//Aliases
const $ = HTML.TAG_NAMES;
const NS = HTML.NAMESPACES;
//Default serializer options
const DEFAULT_OPTIONS = {
treeAdapter: defaultTreeAdapter
};
//Escaping regexes
const AMP_REGEX = /&/g;
const NBSP_REGEX = /\u00a0/g;
const DOUBLE_QUOTE_REGEX = /"/g;
const LT_REGEX = /</g;
const GT_REGEX = />/g;
//Serializer
class Serializer {
constructor(node, options) {
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.treeAdapter = this.options.treeAdapter;
this.html = '';
this.startNode = node;
}
//API
serialize() {
this._serializeChildNodes(this.startNode);
return this.html;
}
//Internals
_serializeChildNodes(parentNode) {
const childNodes = this.treeAdapter.getChildNodes(parentNode);
if (childNodes) {
for (let i = 0, cnLength = childNodes.length; i < cnLength; i++) {
const currentNode = childNodes[i];
if (this.treeAdapter.isElementNode(currentNode)) {
this._serializeElement(currentNode);
} else if (this.treeAdapter.isTextNode(currentNode)) {
this._serializeTextNode(currentNode);
} else if (this.treeAdapter.isCommentNode(currentNode)) {
this._serializeCommentNode(currentNode);
} else if (this.treeAdapter.isDocumentTypeNode(currentNode)) {
this._serializeDocumentTypeNode(currentNode);
}
}
}
}
_serializeElement(node) {
const tn = this.treeAdapter.getTagName(node);
const ns = this.treeAdapter.getNamespaceURI(node);
this.html += '<' + tn;
this._serializeAttributes(node);
this.html += '>';
if (
tn !== $.AREA &&
tn !== $.BASE &&
tn !== $.BASEFONT &&
tn !== $.BGSOUND &&
tn !== $.BR &&
tn !== $.COL &&
tn !== $.EMBED &&
tn !== $.FRAME &&
tn !== $.HR &&
tn !== $.IMG &&
tn !== $.INPUT &&
tn !== $.KEYGEN &&
tn !== $.LINK &&
tn !== $.META &&
tn !== $.PARAM &&
tn !== $.SOURCE &&
tn !== $.TRACK &&
tn !== $.WBR
) {
const childNodesHolder =
tn === $.TEMPLATE && ns === NS.HTML ? this.treeAdapter.getTemplateContent(node) : node;
this._serializeChildNodes(childNodesHolder);
this.html += '</' + tn + '>';
}
}
_serializeAttributes(node) {
const attrs = this.treeAdapter.getAttrList(node);
for (let i = 0, attrsLength = attrs.length; i < attrsLength; i++) {
const attr = attrs[i];
const value = Serializer.escapeString(attr.value, true);
this.html += ' ';
if (!attr.namespace) {
this.html += attr.name;
} else if (attr.namespace === NS.XML) {
this.html += 'xml:' + attr.name;
} else if (attr.namespace === NS.XMLNS) {
if (attr.name !== 'xmlns') {
this.html += 'xmlns:';
}
this.html += attr.name;
} else if (attr.namespace === NS.XLINK) {
this.html += 'xlink:' + attr.name;
} else {
this.html += attr.prefix + ':' + attr.name;
}
this.html += '="' + value + '"';
}
}
_serializeTextNode(node) {
const content = this.treeAdapter.getTextNodeContent(node);
const parent = this.treeAdapter.getParentNode(node);
let parentTn = void 0;
if (parent && this.treeAdapter.isElementNode(parent)) {
parentTn = this.treeAdapter.getTagName(parent);
}
if (
parentTn === $.STYLE ||
parentTn === $.SCRIPT ||
parentTn === $.XMP ||
parentTn === $.IFRAME ||
parentTn === $.NOEMBED ||
parentTn === $.NOFRAMES ||
parentTn === $.PLAINTEXT ||
parentTn === $.NOSCRIPT
) {
this.html += content;
} else {
this.html += Serializer.escapeString(content, false);
}
}
_serializeCommentNode(node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->';
}
_serializeDocumentTypeNode(node) {
const name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) + '>';
}
}
// NOTE: used in tests and by rewriting stream
Serializer.escapeString = function(str, attrMode) {
str = str.replace(AMP_REGEX, '&amp;').replace(NBSP_REGEX, '&nbsp;');
if (attrMode) {
str = str.replace(DOUBLE_QUOTE_REGEX, '&quot;');
} else {
str = str.replace(LT_REGEX, '&lt;').replace(GT_REGEX, '&gt;');
}
return str;
};
module.exports = Serializer;