Files
exceljs_practice/scripts/rewrite-pattern.js
Cody Zhang ff9710dc08 add scripts
2020-10-08 17:31:39 +08:00

397 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict';
function _createForOfIteratorHelper(o, allowArrayLike) { var it; if (typeof Symbol === "undefined" || o[Symbol.iterator] == null) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = o[Symbol.iterator](); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
var generate = require('regjsgen').generate;
var parse = require('regjsparser').parse;
var regenerate = require('regenerate');
var unicodeMatchProperty = require('unicode-match-property-ecmascript');
var unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
var iuMappings = require('./data/iu-mappings.js');
var ESCAPE_SETS = require('./data/character-class-escape-sets.js'); // Prepare a Regenerate set containing all code points, used for negative
// character classes (if any).
var UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF); // Without the `u` flag, the range stops at 0xFFFF.
// https://mths.be/es6#sec-pattern-semantics
var BMP_SET = regenerate().addRange(0x0, 0xFFFF); // Prepare a Regenerate set containing all code points that are supposed to be
// matched by `/./u`. https://mths.be/es6#sec-atom
var DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
.remove( // minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
0x000A, // Line Feed <LF>
0x000D, // Carriage Return <CR>
0x2028, // Line Separator <LS>
0x2029 // Paragraph Separator <PS>
);
var getCharacterClassEscapeSet = function getCharacterClassEscapeSet(character, unicode, ignoreCase) {
if (unicode) {
if (ignoreCase) {
return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
}
return ESCAPE_SETS.UNICODE.get(character);
}
return ESCAPE_SETS.REGULAR.get(character);
};
var getUnicodeDotSet = function getUnicodeDotSet(dotAll) {
return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
};
var getUnicodePropertyValueSet = function getUnicodePropertyValueSet(property, value) {
var path = value ? "".concat(property, "/").concat(value) : "Binary_Property/".concat(property);
try {
return require("regenerate-unicode-properties/".concat(path, ".js"));
} catch (exception) {
throw new Error("Failed to recognize value `".concat(value, "` for property ") + "`".concat(property, "`."));
}
};
var handleLoneUnicodePropertyNameOrValue = function handleLoneUnicodePropertyNameOrValue(value) {
// It could be a `General_Category` value or a binary property.
// Note: `unicodeMatchPropertyValue` throws on invalid values.
try {
var _property = 'General_Category';
var category = unicodeMatchPropertyValue(_property, value);
return getUnicodePropertyValueSet(_property, category);
} catch (exception) {} // Its not a `General_Category` value, so check if its a binary
// property. Note: `unicodeMatchProperty` throws on invalid properties.
var property = unicodeMatchProperty(value);
return getUnicodePropertyValueSet(property);
};
var getUnicodePropertyEscapeSet = function getUnicodePropertyEscapeSet(value, isNegative) {
var parts = value.split('=');
var firstPart = parts[0];
var set;
if (parts.length == 1) {
set = handleLoneUnicodePropertyNameOrValue(firstPart);
} else {
// The pattern consists of two parts, i.e. `Property=Value`.
var property = unicodeMatchProperty(firstPart);
var _value = unicodeMatchPropertyValue(property, parts[1]);
set = getUnicodePropertyValueSet(property, _value);
}
if (isNegative) {
return UNICODE_SET.clone().remove(set);
}
return set.clone();
}; // Given a range of code points, add any case-folded code points in that range
// to a set.
regenerate.prototype.iuAddRange = function (min, max) {
var $this = this;
do {
var folded = caseFold(min);
if (folded) {
$this.add(folded);
}
} while (++min <= max);
return $this;
};
var update = function update(item, pattern) {
var tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');
switch (tree.type) {
case 'characterClass':
case 'group':
case 'value':
// No wrapping needed.
break;
default:
// Wrap the pattern in a non-capturing group.
tree = wrap(tree, pattern);
}
Object.assign(item, tree);
};
var wrap = function wrap(tree, pattern) {
// Wrap the pattern in a non-capturing group.
return {
'type': 'group',
'behavior': 'ignore',
'body': [tree],
'raw': "(?:".concat(pattern, ")")
};
};
var caseFold = function caseFold(codePoint) {
return iuMappings.get(codePoint) || false;
};
var processCharacterClass = function processCharacterClass(characterClassItem, regenerateOptions) {
var set = regenerate();
var _iterator = _createForOfIteratorHelper(characterClassItem.body),
_step;
try {
for (_iterator.s(); !(_step = _iterator.n()).done;) {
var item = _step.value;
switch (item.type) {
case 'value':
set.add(item.codePoint);
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
var folded = caseFold(item.codePoint);
if (folded) {
set.add(folded);
}
}
break;
case 'characterClassRange':
var min = item.min.codePoint;
var max = item.max.codePoint;
set.addRange(min, max);
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
set.iuAddRange(min, max);
}
break;
case 'characterClassEscape':
set.add(getCharacterClassEscapeSet(item.value, config.unicode, config.ignoreCase));
break;
case 'unicodePropertyEscape':
set.add(getUnicodePropertyEscapeSet(item.value, item.negative));
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* istanbul ignore next */
default:
throw new Error("Unknown term type: ".concat(item.type));
}
}
} catch (err) {
_iterator.e(err);
} finally {
_iterator.f();
}
if (characterClassItem.negative) {
update(characterClassItem, "(?!".concat(set.toString(regenerateOptions), ")[\\s\\S]"));
} else {
update(characterClassItem, set.toString(regenerateOptions));
}
return characterClassItem;
};
var updateNamedReference = function updateNamedReference(item, index) {
delete item.name;
item.matchIndex = index;
};
var assertNoUnmatchedReferences = function assertNoUnmatchedReferences(groups) {
var unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
if (unmatchedReferencesNames.length > 0) {
throw new Error("Unknown group names: ".concat(unmatchedReferencesNames));
}
};
var processTerm = function processTerm(item, regenerateOptions, groups) {
switch (item.type) {
case 'dot':
if (config.useDotAllFlag) {
break;
} else if (config.unicode) {
update(item, getUnicodeDotSet(config.dotAll).toString(regenerateOptions));
} else if (config.dotAll) {
// TODO: consider changing this at the regenerate level.
update(item, '[\\s\\S]');
}
break;
case 'characterClass':
item = processCharacterClass(item, regenerateOptions);
break;
case 'unicodePropertyEscape':
if (config.unicodePropertyEscape) {
update(item, getUnicodePropertyEscapeSet(item.value, item.negative).toString(regenerateOptions));
}
break;
case 'characterClassEscape':
update(item, getCharacterClassEscapeSet(item.value, config.unicode, config.ignoreCase).toString(regenerateOptions));
break;
case 'group':
if (item.behavior == 'normal') {
groups.lastIndex++;
}
if (item.name && config.namedGroup) {
var name = item.name.value;
if (groups.names[name]) {
throw new Error("Multiple groups with the same name (".concat(name, ") are not allowed."));
}
var index = groups.lastIndex;
delete item.name;
groups.names[name] = index;
if (groups.onNamedGroup) {
groups.onNamedGroup.call(null, name, index);
}
if (groups.unmatchedReferences[name]) {
groups.unmatchedReferences[name].forEach(function (reference) {
updateNamedReference(reference, index);
});
delete groups.unmatchedReferences[name];
}
}
/* falls through */
case 'alternative':
case 'disjunction':
case 'quantifier':
item.body = item.body.map(function (term) {
return processTerm(term, regenerateOptions, groups);
});
break;
case 'value':
var codePoint = item.codePoint;
var set = regenerate(codePoint);
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
var folded = caseFold(codePoint);
if (folded) {
set.add(folded);
}
}
update(item, set.toString(regenerateOptions));
break;
case 'reference':
if (item.name) {
var _name = item.name.value;
var _index = groups.names[_name];
if (_index) {
updateNamedReference(item, _index);
break;
}
if (!groups.unmatchedReferences[_name]) {
groups.unmatchedReferences[_name] = [];
} // Keep track of references used before the corresponding group.
groups.unmatchedReferences[_name].push(item);
}
break;
case 'anchor':
case 'empty':
case 'group':
// Nothing to do here.
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* istanbul ignore next */
default:
throw new Error("Unknown term type: ".concat(item.type));
}
return item;
};
var config = {
'ignoreCase': false,
'unicode': false,
'dotAll': false,
'useDotAllFlag': false,
'useUnicodeFlag': false,
'unicodePropertyEscape': false,
'namedGroup': false
};
var rewritePattern = function rewritePattern(pattern, flags, options) {
config.unicode = flags && flags.includes('u');
var regjsparserFeatures = {
'unicodePropertyEscape': config.unicode,
'namedGroups': true,
'lookbehind': options && options.lookbehind
};
config.ignoreCase = flags && flags.includes('i');
var supportDotAllFlag = options && options.dotAllFlag;
config.dotAll = supportDotAllFlag && flags && flags.includes('s');
config.namedGroup = options && options.namedGroup;
config.useDotAllFlag = options && options.useDotAllFlag;
config.useUnicodeFlag = options && options.useUnicodeFlag;
config.unicodePropertyEscape = options && options.unicodePropertyEscape;
if (supportDotAllFlag && config.useDotAllFlag) {
throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!');
}
var regenerateOptions = {
'hasUnicodeFlag': config.useUnicodeFlag,
'bmpOnly': !config.unicode
};
var groups = {
'onNamedGroup': options && options.onNamedGroup,
'lastIndex': 0,
'names': Object.create(null),
// { [name]: index }
'unmatchedReferences': Object.create(null) // { [name]: Array<reference> }
};
var tree = parse(pattern, flags, regjsparserFeatures); // Note: `processTerm` mutates `tree` and `groups`.
processTerm(tree, regenerateOptions, groups);
assertNoUnmatchedReferences(groups);
return generate(tree);
};
module.exports = rewritePattern;