Fork me on GitHub

core/parser.js

            
            
/**
 * @private
 */

import Definitions from './definitions.js';
import Color from './color.js';
import FontMetrics from './fontMetrics.js';
import Lexer from './lexer.js';
import MathAtomModule from './mathAtom.js';

const MathAtom = MathAtomModule.MathAtom;

/**
 * A parser transforms a list of tokens into a list of MathAtom.
 *
 * @param {InputToken[]} tokens - An array of tokens generated by the lexer.
 * @param {Object} [args] - An optional list of arguments. `#n` tokens will be
 * substituted with the corresponding element in the args array. This is used
 * when parsing macros.
 * @property {Object} [macros] - Optional macro definitions.
 * @class Parser
 * @global
 * @property {InputToken[]} tokens - An array of tokens generated by the lexer.
 * @property {Object} args - Optional arguments to substitute the `#` token.
 * @property {Object} macros - A dictionary of objects, index by the name of
 * the macro, with the following keys:
 *    * args: an integer, the number of arguments, default 0. They can be referenced as #0,
 *       #1, #2... inside the definition of the macro
 *    * def: a string, the definition of the macro, which can reference other macros
 * @property {number} index - The current token to be parsed: index in `this.tokens`
 * @property {MathAtom[]} mathList - Accumulated result of the parsing by
 * `parseAtom()`
 * @property {string} parseMode - The parse mode indicates the syntax rules to
 * use to parse the upcoming tokens.
 *  Valid values include:
 *  - `'math'`: spaces are ignored, math functions are allowed
 *  - `'text'`: spaces are accounted for, math functions are ignored
 *  - `'string'`
 *  - `'color'`: color name, hex value: `'#fff'`, `'#a0a0a0'`
 *  - `'number'`: `+/-12.56`
 *  - `'dimen'`: `'25mu'`, `'2pt'`
 *  - `'skip'`: `'25mu plus 2em minus fiLll'`, `'2pt'`
 *  - `'colspec'`: formating of a column in tabular environment, e.g. `'[email protected]{.}l'`
 * @property {boolean} tabularMode - When in tabular mode, `'&'` is interpreted as
 *  a column separator and `'\'` as a row separator. Used for matrixes, etc...
 * @property {number} endCount - Counter to prevent deadlock. If `end()` is
 * called too many times (1,000) in a row for the same token, bail.
 * @private
 */
class Parser {
    constructor(tokens, args, macros) {
        this.tokens = tokens;
        this.index = 0;
        this.args = args;
        this.macros = macros;
        this.mathList = [];
        this.parseMode = 'math';
        this.tabularMode = false;
        this.endCount = 0;
    }
    swapMathList(newMathList) {
        const result = this.mathList;
        this.mathList = newMathList || [];
        return result;
    }
    swapParseMode(mode) {
        const result = this.parseMode;
        this.parseMode = mode;
        return result;
    }
    /**
     * True if we've reached the end of the token stream.
     * @method Parser#end
     * @private
     */
    end() {
        // To prevent a deadlock, count how many times end() is called without the
        // index advancing. If it happens more than 1,000 times in a row,
        // assume something is broken and pretend the stream is finished.
        this.endCount++;
        return this.index >= this.tokens.length || this.endCount > 1000;
    }
    get() {
        this.endCount = 0;
        return this.index < this.tokens.length ? this.tokens[this.index++] : null;
    }
    peek(offset) {
        const index = this.index + (offset ? offset : 0);
        return index < this.tokens.length ? this.tokens[index] : null;
    }
    /**
     * Return the last atom of the math list.
     * If force is true (or undefined) and the list is empty, a new empty
     * atom is created and returned as the result.
     * @method Parser#lastMathAtom
     */
    lastMathAtom() {
        if (this.mathList.length === 0 ||
            this.mathList[this.mathList.length - 1].type !== 'mop') {
            // ZERO WIDTH SPACE
            const lastAtom = new MathAtom(this.parseMode, 'msubsup', '\u200b', 'main');
            lastAtom.attributes = {
                "aria-hidden": true
            };
            this.mathList.push(lastAtom);
        }
        return this.mathList[this.mathList.length - 1];
    }
    /**
     * @param {string} type
     * @return {boolean} True if the next token is of the specified type
     * @method Parser#hasToken
     */
    hasToken(type) {
        const index = this.index;
        return index < this.tokens.length ?
            this.tokens[index].type === type : false;
    }
    /**
     * @param {string} [value]
     * @return {boolean} True if the next token is of type `'literal` and has the
     * specified value. If `value` is empty, return true if the token is of type
     * `'literal'`
     * @method Parser#hasLiteral
     */
    hasLiteral(value) {
        const index = this.index;
        return index < this.tokens.length ?
            this.tokens[index].type === 'literal' &&
            (!value || this.tokens[index].value === value) : false;
    }
    /**
     * @param {RegEx} pattern
     * @return {boolean} True if the next token is of type `'literal` and matches
     * the specified regular expression pattern.
     * @method Parser#hasLiteralPattern
     */
    hasLiteralPattern(pattern) {
        return this.hasToken('literal') &&
            pattern.test(this.tokens[this.index].value);
    }
    hasCommand(command) {
        console.assert(command === '\\' || command.charAt(0) !== '\\', 'hasCommand() does not require a \\');
        const index = this.index;
        return index < this.tokens.length ?
            this.tokens[index].type === 'command' &&
            this.tokens[index].value === command : false;
    }
    hasInfixCommand() {
        const index = this.index;
        if (index < this.tokens.length &&
            this.tokens[index].type === 'command') {
            const info = Definitions.getInfo('\\' + this.tokens[index].value, this.parseMode, this.macros);
            return info && info.infix;
        }
        return false;
    }
    hasColumnSeparator() {
        const index = this.index;
        return this.tabularMode && index < this.tokens.length ?
            this.tokens[index].type === 'literal' &&
            this.tokens[index].value === '&' : false;
    }
    hasRowSeparator() {
        const index = this.index;
        return this.tabularMode && index < this.tokens.length ?
            this.tokens[index].type === 'command' &&
            (this.tokens[index].value === '\\' ||
                this.tokens[index].value === 'cr') : false;
    }
    parseColumnSeparator() {
        if (this.hasColumnSeparator()) {
            this.index++;
            return true;
        }
        return false;
    }
    /**
     * Return the appropriate value for a placeholder, either a default
     * one, or if a value was provided for #? via args, that value.
     */
    placeholder() {
        if (this.args && typeof this.args['?'] === 'string') {
            // If there is a specific value defined for the placeholder,
            // use it.
            return parseTokens(Lexer.tokenize(this.args['?']), this.parseMode, null, this.macros);
        }
        // U+2753 = BLACK QUESTION MARK ORNAMENT
        return [new MathAtom(this.parseMode, 'placeholder', '?')];
    }
    hasImplicitCommand(commands) {
        if (this.index < this.tokens.length) {
            const token = this.tokens[this.index];
            if (token.type === 'command') {
                return commands.includes(token.value);
            }
        }
        return false;
    }
    parseRowSeparator() {
        if (this.hasRowSeparator()) {
            this.index++;
            return true;
        }
        return false;
    }
    /**
     * @param {string} type
     * @method Parser#parseToken
     */
    parseToken(type) {
        if (this.hasToken(type)) {
            this.index++;
            return true;
        }
        return false;
    }
    skipUntilToken(type) {
        while (!this.end() && !this.parseToken(type)) {
            this.get();
        }
    }
    parseCommand(command) {
        if (this.hasCommand(command)) {
            this.index++;
            return true;
        }
        return false;
    }
    parseLiteral(literal) {
        if (this.hasLiteral(literal)) {
            this.index++;
            return true;
        }
        return false;
    }
    parseFiller() {
        let skipped = false;
        let done = false;
        do {
            const skippedSpace = this.parseToken('space');
            const skippedRelax = this.parseCommand('relax');
            skipped = skipped || skippedSpace || skippedRelax;
            done = !skippedSpace && !skippedRelax;
        } while (!done);
        return skipped;
    }
    /**
     * Keywords are used to specify dimensions, and for various other
     * syntactic constructs. Unlike commands, they are not case sensitive.
     * There are 25 keywords:
     * at by bp cc cm dd depth em ex fil fill filll height in minus
     * mm mu pc plus pt sp spread to true width
     *
     * TeX: 8212
     * @param {string} keyword
     * @return {boolean} true if the expected keyword is present
     * @method Parser#parseKeyword
     * @private
     */
    parseKeyword(keyword) {
        const savedIndex = this.index;
        let done = this.end();
        let value = '';
        while (!done) {
            const token = this.get();
            if (token.type === 'literal') {
                value += token.value;
            }
            done = this.end() || token.type !== 'literal' ||
                value.length >= keyword.length;
        }
        const hasKeyword = keyword.toUpperCase() === value.toUpperCase();
        if (!hasKeyword) {
            this.index = savedIndex;
        }
        return hasKeyword;
    }
    /**
     * Return a sequence of characters as a string.
     * i.e. 'abcd' returns 'abcd'.
     * Terminates on the first non-character encountered
     * e.g. '{', '}' etc...
     * Will also terminate on ']'
     * @return {string}
     * @method Parser#scanString
     * @private
     */
    scanString() {
        let result = '';
        let done = this.end();
        while (!done) {
            if (this.hasLiteral(']')) {
                done = true;
            } else if (this.hasToken('literal')) {
                result += this.get().value;
                done = this.end();
            } else if (this.parseToken('space')) {
                result += ' ';
                done = this.end();
            } else if (this.hasToken('command')) {
                // TeX will give a 'Missing \endcsname inserted' error
                // if it encounters any command when expecting a string.
                // We're a bit more lax.
                const token = this.get();
                const info = Definitions.getInfo('\\' + token.value, this.parseMode, this.macros);
                // If parseMode is 'math', info.type will never be 'textord'
                // Otherwise, info.type will never be 'mord'
                if (info && (info.type === 'mord' || info.type === 'textord') && info.value) {
                    result += info.value;
                }
                done = this.end();
            } else {
                done = true;
            }
        }
        return result;
    }
    /**
     * Return a CSS color (#rrggbb)
     * @method Parser#scanColor
     * @private
     */
    scanColor() {
        return Color.stringToColor(this.scanString());
    }
    /**
     * Return as a number a group of characters representing a
     * numerical quantity.
     *
     * From TeX:8695 (scan_int):
     * An integer number can be preceded by any number of spaces and `\.+' or
     * `\.-' signs. Then comes either a decimal constant (i.e., radix 10), an
     * octal constant (i.e., radix 8, preceded by~\.\'), a hexadecimal constant
     * (radix 16, preceded by~\."), an alphabetic constant (preceded by~\.\`), or
     * an internal variable.
     * @return {number}
     * @method Parser#scanNumber
     * @private
     */
    scanNumber(isInteger) {
        const negative = this.parseLiteral('-');
        // Optional (ignorable) '+' sign
        if (!negative) this.parseLiteral('+');
        this.parseToken('space');
        isInteger = !!isInteger;
        let radix = 10;
        let digits = /[0-9]/;
        if (this.parseLiteral("'")) {
            // Apostrophe indicates an octal value
            radix = 8;
            digits = /[0-7]/;
            isInteger = true;
        } else if (this.parseLiteral('"') || this.parseLiteral('x')) {
            // Double-quote indicates a hex value
            // The 'x' prefix notation for the hexadecimal numbers is a MathJax extension.
            // For example: 'x3a'
            radix = 16;
            // Hex digits have to be upper-case
            digits = /[0-9A-F]/;
            isInteger = true;
        }
        let value = '';
        while (this.hasLiteralPattern(digits)) {
            value += this.get().value;
        }
        // Parse the fractional part, if applicable
        if (!isInteger && (this.parseLiteral('.') || this.parseLiteral(','))) {
            value += '.';
            while (this.hasLiteralPattern(digits)) {
                value += this.get().value;
            }
        }
        const result = isInteger ? parseInt(value, radix) : parseFloat(value);
        return negative ? -result : result;
    }
    /**
     * Return as a floating point number a dimension in pt (1 em = 10 pt)
     *
     * See TeX:8831
     * @todo: note that some units depend on the font (em, ex). So it might be
     * better to return a dimen struct with the value + unit and resolve
     * later when we have a font context....
     * @return {number}
     * @method Parser#scanDimen
     * @private
     */
    scanDimen() {
        const value = this.scanNumber(false);
        this.parseToken('space');
        let result;
        if (this.parseKeyword('pt')) {
            result = FontMetrics.toEm(value, 'pt');
        } else if (this.parseKeyword('mm')) {
            result = FontMetrics.toEm(value, 'mm');
        } else if (this.parseKeyword('cm')) {
            result = FontMetrics.toEm(value, 'cm');
        } else if (this.parseKeyword('ex')) {
            result = FontMetrics.toEm(value, 'ex');
        } else if (this.parseKeyword('px')) {
            result = FontMetrics.toEm(value, 'px');
        } else if (this.parseKeyword('em')) {
            result = FontMetrics.toEm(value, 'em');
        } else if (this.parseKeyword('bp')) {
            result = FontMetrics.toEm(value, 'bp');
        } else if (this.parseKeyword('dd')) {
            result = FontMetrics.toEm(value, 'dd');
        } else if (this.parseKeyword('pc')) {
            result = FontMetrics.toEm(value, 'pc');
        } else if (this.parseKeyword('in')) {
            result = FontMetrics.toEm(value, 'in');
        } else if (this.parseKeyword('mu')) {
            result = FontMetrics.toEm(value, 'mu');
        } else {
            // If the units are missing, TeX assumes 'pt'
            result = FontMetrics.toEm(value, 'pt');
        }
        return result;
    }
    scanSkip() {
        const result = this.scanDimen();
        // We parse, but ignore the optional 'plus' and 'minus'
        // arguments.
        this.parseToken('space');
        // 'plus', optionally followed by 'minus'
        // ('minus' cannot come before 'plus')
        // dimen or 'hfill'
        if (this.parseKeyword('plus')) {
            // @todo there could also be a \hFilLlL command here
            this.scanDimen();
        }
        this.parseToken('space');
        if (this.parseKeyword('minus')) {
            // @todo there could also be a \hFilLlL command here
            this.scanDimen();
        }
        return result;
    }
    scanColspec() {
        this.parseToken('space');
        const result = [];
        while (!this.end() && !(this.hasToken('}') || this.hasLiteral(']'))) {
            if (this.hasLiteral()) {
                const literal = this.get().value;
                if ('lcr'.includes(literal)) {
                    result.push({ align: literal });
                } else if (literal === '|') {
                    result.push({ rule: true });
                } else if (literal === '@') {
                    if (this.parseToken('{')) {
                        const savedParsemode = this.swapParseMode('math');
                        result.push({ gap: this.scanImplicitGroup(token => token.type === '}') });
                        this.swapParseMode(savedParsemode);
                    }
                    this.parseToken('}');
                }
            }
        }
        return result;
    }
    /**
     * Parse a `\(...\)` or `\[...\]` sequence
     * @return {MathAtom} group for the sequence or null
     * @method Parser#scanModeSet
     * @private
     */
    scanModeSet() {
        let final;
        if (this.parseCommand('(')) final = ')';
        if (!final && this.parseCommand('[')) final = ']';
        if (!final) return null;
        const savedParsemode = this.swapParseMode('math');
        const result = new MathAtom('math', 'group');
        result.mathstyle = final === ')' ? 'textstyle' : 'displaystyle';
        result.body = this.scanImplicitGroup(token => token.type === 'command' && token.value === final);
        this.parseCommand(final);
        this.swapParseMode(savedParsemode);
        if (!result.body || result.body.length === 0) return null;
        return result;
    }
    /**
     * Parse a `$...$` or `$$...$$` sequence
     * @method Parser#scanModeShift
     * @private
     */
    scanModeShift() {
        if (!this.hasToken('$') && !this.hasToken('$$')) return null;
        const final = this.get().type;
        const result = new MathAtom('math', 'group');
        result.mathstyle = final === '$' ? 'textstyle' : 'displaystyle';
        result.latexOpen = result.mathstyle === 'textstyle' ? '$' : '$$';
        result.latexClose = result.latexOpen;
        const savedParsemode = this.swapParseMode('math');
        result.body = this.scanImplicitGroup(token => token.type === final);
        this.parseToken(final);
        this.swapParseMode(savedParsemode);
        if (!result.body || result.body.length === 0) return null;
        return result;
    }
    /**
     * Parse a \begin{env}...\end{end} sequence
     * @method Parser#scanEnvironment
     * @private
     */
    scanEnvironment() {
        // An environment starts with a \begin command
        if (!this.parseCommand('begin')) return null;
        // The \begin command is immediately followed by the environment
        // name, as a string argument
        const envName = this.scanArg('string');
        const env = Definitions.getEnvironmentInfo(envName);
        // If the environment has some arguments, parse them
        const args = [];
        if (env && env.params) {
            for (const param of env.params) {
                // Parse an argument
                if (param.optional) {
                    // If it's not present, return the default argument value
                    const arg = this.scanOptionalArg(param.type);
                    // args.push(arg ? arg : param.defaultValue); @todo defaultvalue
                    args.push(arg);
                } else {
                    // If it's not present, scanArg returns null,
                    // but push it on the list of arguments anyway.
                    // The null value will be interpreted as unspecified
                    // optional value by the command handler.
                    args.push(this.scanArg(param.type));
                }
            }
        }
        // Some environments change the mode
        const savedMode = this.parseMode;
        const savedTabularMode = this.tabularMode;
        const savedMathList = this.swapMathList([]);
        // @todo: since calling scanImplicitGroup(), may not need to save/restore the mathlist
        this.tabularMode = env.tabular;
        const array = [];
        const rowGaps = [];
        let row = [];
        let done = false;
        do {
            done = this.end();
            if (!done && this.parseCommand('end')) {
                done = this.scanArg('string') === envName;
            }
            if (!done) {
                if (this.parseColumnSeparator()) {
                    row.push(this.swapMathList([]));
                } else if (this.parseRowSeparator()) {
                    row.push(this.swapMathList([]));
                    let gap = 0;
                    this.parseToken('space');
                    if (this.parseLiteral('[')) {
                        gap = this.scanDimen();
                        this.parseToken('space');
                        this.parseLiteral(']');
                    }
                    rowGaps.push(gap || 0);
                    array.push(row);
                    row = [];
                } else {
                    this.mathList = this.mathList.concat(this.scanImplicitGroup());
                }
            }
        } while (!done);
        row.push(this.swapMathList([]));
        if (row.length > 0) array.push(row);
        const newMathList = this.swapMathList(savedMathList);
        // If we're in tabular mode, we should end up with an empty mathlist
        console.assert(!this.tabularMode || newMathList.length === 0, 'Leftover atoms in tabular mode');
        this.parseMode = savedMode;
        this.tabularMode = savedTabularMode;
        if (!env.tabular && newMathList.length === 0) return null;
        if (env.tabular && array.length === 0) return null;
        const result = new MathAtom(this.parseMode, 'array', null, null, env.parser ? env.parser(envName, args, array) : {});
        result.array = array;
        result.body = newMathList;
        result.rowGaps = rowGaps;
        result.env = env;
        result.env.name = envName;
        return result;
    }
    /**
     * Parse a sequence terminated with a group end marker, such as
     * `}`, `\end`, `&`, etc...
     * Returns an array of atoms or an empty array if the sequence
     * terminates right away.
     * @param {function(Token):boolean} [done] A predicate indicating if a token signals the
     * end of an implicit group
     * @return {MathAtom[]}
     * @method Parser#scanImplicitGroup
     * @private
     */
    scanImplicitGroup(done) {
        // {black\color{red}red\color{green}green}black
        // An implicit group is a sequence of atoms that terminates with
        // a `'}'`, `'&'`, `'\'`, `'\cr'` or `'\end'` or the end of the stream
        if (!done) {
            done = token => token.type === '}' ||
                (token.type === 'literal' && token.value === '&') ||
                (token.type === 'command' && (token.value === 'end' ||
                    token.value === 'cr' ||
                    token.value === '\\'));
        }
        // To handle infix operators, we'll keep track of their prefix
        // (tokens coming before them)
        let infix = null; // A token
        let prefix = null; // A mathlist
        const savedMathlist = this.swapMathList([]);
        // if (this.index >= this.tokens.length) return true;
        // const token = this.tokens[this.index];
        while (!this.end() && !done(this.peek())) {
            if (this.hasImplicitCommand(SIZING_COMMANDS)) {
                // Implicit sizing command such as \Large, \small
                // affect the tokens following them
                // Note these commands are only appropriate in 'text' mode.
                const atom = new MathAtom(this.parseMode, 'sizing');
                atom.size = {
                    'tiny': 'size1',
                    'scriptsize': 'size2',
                    'footnotesize': 'size3',
                    'small': 'size4',
                    'normalsize': 'size5',
                    'large': 'size6',
                    'Large': 'size7',
                    'LARGE': 'size8',
                    'huge': 'size9',
                    'Huge': 'size10'
                }[this.get().value];
                this.mathList.push(atom);
            } else if (this.hasImplicitCommand(MATHSTYLE_COMMANDS)) {
                // Implicit math style commands such as \displaystyle, \textstyle...
                // Note these commands switch to math mode and a specific size
                // \textsize is the mathstyle used for inlinemath, not for text
                this.parseMode = 'math';
                const atom = new MathAtom('math', 'mathstyle');
                atom.mathstyle = this.get().value;
                this.mathList.push(atom);
            } else if (this.hasInfixCommand() && !infix) {
                // The next token is an infix and we have not seen one yet
                // (there can be only one infix command per implicit group).
                infix = this.get();
                // Save the math list so far and start a new one
                prefix = this.swapMathList([]);
            } else {
                this.parseAtom();
            }
        }
        let result;
        if (infix) {
            const suffix = this.swapMathList(savedMathlist);
            // The current parseMode, this.parseMode, may no longer have the value
            // it had when we encountered the infix. However, since all infix are
            // only defined in 'math' mode, we can use the 'math' constant
            // for the parseMode
            const info = Definitions.getInfo('\\' + infix.value, 'math', this.macros);
            if (info) {
                result = [new MathAtom(this.parseMode, info.type || 'mop', info.value || infix.value, info.fontFamily, info.handler ?
                    info.handler('\\' + infix.value, [prefix, suffix]) :
                    null)];
            } else {
                result = [new MathAtom(this.parseMode, 'mop', infix.value, '', null)];
            }
        } else {
            result = this.swapMathList(savedMathlist);
        }
        return result;
    }
    /**
     * Parse a group enclosed in a pair of braces: `{...}`.
     *
     * Return either a group MathAtom or null if not a group.
     *
     * Return a group MathAtom with an empty body if an empty
     * group (i.e. `{}`).
     * @return {MathAtom}
     * @method Parser#scanGroup
     * @private
     */
    scanGroup() {
        if (!this.parseToken('{')) return null;
        const result = new MathAtom(this.parseMode, 'group');
        result.body = this.scanImplicitGroup(token => token.type === '}');
        this.parseToken('}');
        return result;
    }
    scanSmartFence() {
        this.parseToken('space');
        if (!this.parseLiteral('(')) return null;
        // We've found an open paren... Convert to a `\mleft...\mright`
        const result = new MathAtom(this.parseMode, 'leftright');
        result.leftDelim = '(';
        result.inner = false; // It's a `\mleft`, not a `\left`
        const savedMathList = this.swapMathList([]);
        let nestLevel = 1;
        while (!this.end() && nestLevel !== 0) {
            if (this.hasLiteral('(')) nestLevel += 1;
            if (this.hasLiteral(')')) nestLevel -= 1;
            if (nestLevel !== 0) this.parseAtom();
        }
        if (nestLevel === 0) this.parseLiteral(')');
        result.rightDelim = nestLevel === 0 ? ')' : '?';
        result.body = this.swapMathList(savedMathList);
        return result;
    }
    /**
     * Scan a delimiter, e.g. '(', '|', '\vert', '\ulcorner'
     *
     * @return {string} The delimiter (as a character or command) or null
     * @memberof Parser
     * @method Parser#scanDelim
     * @private
     */
    scanDelim() {
        this.parseToken('space');
        const token = this.get();
        if (!token) return null;
        let delim = '.';
        if (token.type === 'command') {
            delim = '\\' + token.value;
        } else if (token.type === 'literal') {
            delim = token.value;
        }
        const info = Definitions.getInfo(delim, 'math', this.macros);
        if (!info) return null;
        if (info.type === 'mopen' || info.type === 'mclose') {
            return delim;
        }
        // Some symbols are not of type mopen/mclose, but are still
        // valid delimiters...
        // '?' is a special delimiter used as a 'placeholder'
        // (when the closing delimiter is displayed greyed out)
        if (['?', '|', '<', '>', '\\vert', '\\Vert', '\\|', '\\surd',
            '\\uparrow', '\\downarrow', '\\Uparrow', '\\Downarrow',
            '\\updownarrow', '\\Updownarrow',
            '\\mid', '\\mvert', '\\mVert'].includes(delim)) {
            return delim;
        }
        return null;
    }
    /**
     * Parse a `/left.../right` sequence.
     *
     * Note: the `/middle` command can occur multiple times inside a
     * `/left.../right` sequence, and is handled separately.
     *
     * Return either an atom of type `'leftright'` or null
     * @return {MathAtom}
     * @method Parser#scanLeftRight
     * @private
     */
    scanLeftRight() {
        if (this.parseCommand('right') || this.parseCommand('mright')) {
            // We have an unbalanced left/right (there's a \right, but no \left)
            const result = new MathAtom(this.parseMode, 'leftright');
            result.rightDelim = this.scanDelim() || '.';
            return result;
        }
        let close = 'right';
        if (!this.parseCommand('left')) {
            if (!this.parseCommand('mleft')) return null;
            close = 'mright';
        }
        const leftDelim = this.scanDelim() || '.';
        const savedMathList = this.swapMathList([]);
        while (!this.end() && !this.parseCommand(close)) {
            this.parseAtom();
        }
        // If we've reached the end and there was no `\right` or
        // there isn't a valid delimiter after `\right`, we'll
        // consider the `\right` missing and set the `rightDelim` to undefined
        const rightDelim = this.scanDelim();
        const result = new MathAtom(this.parseMode, 'leftright');
        result.leftDelim = leftDelim;
        result.rightDelim = rightDelim;
        result.inner = close === 'right';
        result.body = this.swapMathList(savedMathList);
        return result;
    }
    /**
     * Parse a subscript/superscript: `^` and `_`.
     *
     * Modify the last atom accordingly.
     *
     * @return {MathAtom}
     * @method Parser#parseSupSub
     * @private
     */
    parseSupSub() {
        // No sup/sub in text or command mode.
        if (this.parseMode !== 'math') return false;
        // Apply the subscript/superscript to the last render atom.
        // If none is present (beginning of the mathlist, i.e. `{^2}`,
        // an empty atom will be created, equivalent to `{{}^2}`
        let result = false;
        while (this.hasToken('^') || this.hasToken('_') || this.hasLiteral("'")) {
            let supsub;
            if (this.hasToken('^')) {
                supsub = 'superscript';
            } else if (this.hasToken('_')) {
                supsub = 'subscript';
            }
            if (this.parseToken('^') || this.parseToken('_')) {
                const arg = this.scanArg();
                if (arg) {
                    const atom = this.lastMathAtom();
                    atom[supsub] = atom[supsub] || [];
                    atom[supsub] = atom[supsub].concat(arg);
                    result = true;
                }
            } else if (this.parseLiteral("'")) {
                // A single quote (prime) is actually equivalent to a
                // '^{\prime}'
                const atom = this.lastMathAtom();
                atom.superscript = atom.superscript || [];
                atom.superscript.push(new MathAtom(atom.parseMode, 'mord', '\u2032', 'main'));
                result = true;
            }
        }
        return result;
    }
    /**
     * Parse a `\limits` or `\nolimits` command.
     *
     * This will change the placement of limits to be either above or below
     * (if `\limits`) or in the superscript/subscript position (if `\nolimits`).
     *
     * This overrides the calculation made for the placement, which is usually
     * dependent on the displaystyle (`inlinemath` prefers `\nolimits`, while
     * `displaymath` prefers `\limits`).
     * @method Parser#parseLimits
     * @private
     */
    parseLimits() {
        // Note: technically, \limits and \nolimits are only applicable
        // after an operator. However, we apply them in all cases. They
        // will simply be ignored when not applicable (i.e. on a literal)
        // which is actually consistent with TeX.
        if (this.parseCommand('limits')) {
            const lastAtom = this.lastMathAtom();
            lastAtom.limits = 'limits';
            // Record that the limits was set through an explicit command
            // so we can generate the appropriate LaTeX later
            lastAtom.explicitLimits = true;
            return true;
        }
        if (this.parseCommand('nolimits')) {
            const lastAtom = this.lastMathAtom();
            lastAtom.limits = 'nolimits';
            // Record that the limits was set through an explicit command
            // so we can generate the appropriate LaTeX later
            lastAtom.explicitLimits = true;
            return true;
        }
        return false;
    }
    scanOptionalArg(parseMode) {
        parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode;
        this.parseToken('space');
        if (!this.parseLiteral('[')) return null;
        const savedParseMode = this.parseMode;
        this.parseMode = parseMode;
        const savedMathlist = this.swapMathList();
        let result;
        while (!this.end() && !this.parseLiteral(']')) {
            if (parseMode === 'string') {
                result = this.scanString();
            } else if (parseMode === 'number') {
                result = this.scanNumber();
            } else if (parseMode === 'dimen') {
                result = this.scanDimen();
            } else if (parseMode === 'skip') {
                result = this.scanSkip();
            } else if (parseMode === 'colspec') {
                result = this.scanColspec();
            } else if (parseMode === 'color') {
                result = this.scanColor() || '#ffffff';
            } else if (parseMode === 'bbox') {
                // The \bbox command takes a very particular argument:
                // a comma delimited list of up to three arguments:
                // a color, a dimension and a string.
                // Split the string by comma delimited sub-strings, ignoring commas
                // that may be inside (). For example"x, rgb(a, b, c)" would return
                // ['x', 'rgb(a, b, c)']
                const list = this.scanString().toLowerCase().trim().split(/,(?![^(]*\)(?:(?:[^(]*\)){2})*[^"]*$)/);
                for (const elem of list) {
                    const color = Color.stringToColor(elem);
                    if (color) {
                        result = result || {};
                        result.backgroundcolor = color;
                    } else {
                        const m = elem.match(/^\s*([0-9.]+)\s*([a-z][a-z])/);
                        if (m) {
                            result = result || {};
                            result.padding = FontMetrics.toEm(m[1], m[2]);
                        } else {
                            const m = elem.match(/^\s*border\s*:\s*(.*)/);
                            if (m) {
                                result = result || {};
                                result.border = m[1];
                            }
                        }
                    }
                }
            } else {
                console.assert(parseMode === 'math', 'Unexpected parse mode: "' + parseMode + '"');
                this.mathList = this.mathList.concat(this.scanImplicitGroup(token => token.type === 'literal' && token.value === ']'));
            }
        }
        this.parseMode = savedParseMode;
        const mathList = this.swapMathList(savedMathlist);
        return result ? result : mathList;
    }
    /**
     * Parse a math field, an argument to a function.
     *
     * An argument can either be a single atom or
     * a sequence of atoms enclosed in braces.
     *
     * @param {string} [parseMode] Temporarily overrides the parser parsemode. For
     * example: `'dimension'`, `'color'`, `'text'`, etc...
     * @method Parser#scanArg
     * @private
     */
    scanArg(parseMode) {
        parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode;
        this.parseFiller();
        let result;
        // An argument (which is called a 'math field' in TeX)
        // could be a single character or symbol, as in `\frac12`
        // Note that ``\frac\sqrt{-1}\alpha\beta`` is equivalent to
        // ``\frac{\sqrt}{-1}{\beta}``
        if (!this.parseToken('{')) {
            if (parseMode === 'delim') {
                return this.scanDelim() || '.';
            } else if (parseMode === 'math') {
                // Parse a single atom.
                const savedParseMode = this.parseMode;
                this.parseMode = 'math';
                const atom = this.scanToken();
                this.parseMode = savedParseMode;
                if (Array.isArray(atom)) return atom;
                return atom ? [atom] : null;
            }
        }
        // If this is a param token, substitute it with the
        // (optional) argument passed to the parser
        if (this.hasToken('#')) {
            const paramToken = this.get();
            this.skipUntilToken('}');
            if (paramToken.value === '?') {
                return this.placeholder();
            }
            if (this.args) {
                if (this.args[paramToken.value] === undefined &&
                    this.args['?'] !== undefined) {
                    return this.placeholder();
                }
                return this.args[paramToken.value] || null;
            }
            return null;
        }
        const savedParseMode = this.parseMode;
        this.parseMode = parseMode;
        const savedMathList = this.swapMathList([]);
        if (parseMode === 'string') {
            result = this.scanString();
            this.skipUntilToken('}');
        } else if (parseMode === 'number') {
            result = this.scanNumber();
            this.skipUntilToken('}');
        } else if (parseMode === 'dimen') {
            result = this.scanDimen();
            this.skipUntilToken('}');
        } else if (parseMode === 'skip') {
            result = this.scanSkip();
            this.skipUntilToken('}');
        } else if (parseMode === 'colspec') {
            result = this.scanColspec();
            this.skipUntilToken('}');
        } else if (parseMode === 'color') {
            result = this.scanColor() || '#ffffff';
            this.skipUntilToken('}');
        } else if (parseMode === 'delim') {
            result = this.scanDelim() || '.';
            this.skipUntilToken('}');
        } else {
            console.assert(parseMode === 'math' || parseMode === 'text', 'Unexpected parse mode: "' + parseMode + '"');
            do {
                this.mathList = this.mathList.concat(this.scanImplicitGroup());
            } while (!this.parseToken('}') && !this.end());
        }
        this.parseMode = savedParseMode;
        const mathList = this.swapMathList(savedMathList);
        return result ? result : mathList;
    }
    /**
     * @return {Array.MathAtom|MathAtom}
     * @method Parser#scanToken
     * @private
     */
    scanToken() {
        const token = this.get();
        if (!token) return null;
        let result = null;
        if (token.type === 'space') {
            if (this.parseMode === 'text') {
                result = new MathAtom('text', 'textord', ' ');
            }
        } else if (token.type === 'esc') {
            // RENDER ESCAPE SEQUENCE INDICATOR
            result = new MathAtom(this.parseMode, 'esc', 'ESC');
        } else if (token.type === 'backslash') {
            // RENDER BACKSLASH INDICATOR
            result = new MathAtom(this.parseMode, 'command', '\\');
        } else if (token.type === 'commandliteral' || token.type === 'backslash') {
            // RENDER ESCAPE SEQUENCE
            let body = token.value;
            while (this.hasToken('commandliteral') ||
                this.hasToken('backslash')) {
                body += this.get().value;
            }
            result = new MathAtom(this.parseMode, 'command', body);
        } else if (token.type === 'placeholder') {
            // RENDER PLACEHOLDER
            result = new MathAtom(this.parseMode, 'placeholder', token.value);
        } else if (token.type === 'command') {
            // RENDER COMMAND
            if (token.value === 'placeholder') {
                result = new MathAtom(this.parseMode, 'placeholder', this.scanArg('string'));
            } else if (token.value === 'char') {
                // \char has a special syntax and requires a non-braced integer
                // argument
                let codepoint = Math.floor(this.scanNumber(true));
                if (!isFinite(codepoint) || codepoint < 0 || codepoint > 0x10FFFF) {
                    codepoint = 0x2753; // BLACK QUESTION MARK
                }
                result = new MathAtom(this.parseMode, this.parseMode === 'math' ? 'mord' : 'textord', String.fromCodePoint(codepoint), 'main');
                result.latex = '{\\char"' +
                    ('000000' + codepoint.toString(16)).toUpperCase().substr(-6) + '}';
            } else if (token.value === 'hskip' || token.value === 'kern') {
                // \hskip and \kern have a special syntax and requires a non-braced
                // 'skip' argument
                const width = this.scanSkip();
                if (!isNaN(width)) {
                    result = new MathAtom(this.parseMode, 'spacing');
                    result.width = width;
                }
            } else {
                result = this.scanMacro(token.value);
                if (!result) {
                    const info = Definitions.getInfo('\\' + token.value, this.parseMode, this.macros);
                    const args = [];
                    // Parse the arguments
                    if (info && info.params) {
                        for (const param of info.params) {
                            // Parse an argument
                            if (param.optional) {
                                // If it's not present, return the default argument value
                                const arg = this.scanOptionalArg(param.type);
                                // args.push(arg ? arg : param.defaultValue); @todo defaultvalue
                                args.push(arg);
                            } else {
                                // If it's not present, scanArg returns null.
                                // Add a placeholder instead.
                                const arg = this.scanArg(param.type);
                                if (arg && arg.length === 1 &&
                                    arg[0].type === 'placeholder' && param.placeholder) {
                                    arg[0].value = param.placeholder;
                                }
                                if (arg) {
                                    args.push(arg);
                                } else if (param.placeholder) {
                                    args.push([new MathAtom(this.parseMode, 'placeholder', param.placeholder)]);
                                } else {
                                    args.push(this.placeholder());
                                }
                            }
                        }
                    }
                    if (info && !info.infix) {
                        // Infix commands should be handled in scanImplicitGroup
                        // If we find an infix command here, it's a syntax error
                        // (second infix command in an implicit group) and should be ignored.
                        // Create the MathAtom
                        // If a handler is present, invoke it with the arguments,
                        // and pass the result to be appended by the constructor.
                        if (info.handler) {
                            result = new MathAtom(this.parseMode, info.type, null, info.fontFamily, info.handler('\\' + token.value, args));
                        } else {
                            result = new MathAtom(this.parseMode, info.type || 'mop', info.value || token.value, info.fontFamily);
                        }
                        result.latex = '\\' + token.value + ' ';
                        if (result.isFunction && this.smartFence) {
                            // The atom was a function that may be followed by
                            // an argument, like `\sin(`
                            const smartFence = this.scanSmartFence();
                            if (smartFence) {
                                result = [result, smartFence];
                            }
                        }
                    }
                }
            }
        } else if (token.type === 'literal') {
            const info = Definitions.getInfo(token.value, this.parseMode, this.macros);
            if (info) {
                result = new MathAtom(this.parseMode, info.type, info.value || token.value, info.fontFamily);
                if (info.isFunction) {
                    result.isFunction = true;
                }
            } else {
                // console.warn('Unknown literal "' + token.value +
                //     '" (U+' + ('000000' + token.value.charCodeAt(0).toString(16)).substr(-6) + ')');
                result = new MathAtom(this.parseMode, this.parseMode === 'math' ? 'mord' : 'textord', token.value, 'main');
            }
            result.latex = Definitions.matchCodepoint(token.value);
            if (info && info.isFunction && this.smartFence) {
                // The atom was a function that may be followed by
                // an argument, like `f(`.
                const smartFence = this.scanSmartFence();
                if (smartFence) {
                    result = [result, smartFence];
                }
            }
        } else if (token.type === '#') {
            // Parameter token in an implicit group (not as a parameter)
            if (token.value === '?') {
                // '#?' indicates that a placeholder should be used
                result = this.placeholder();
            } else if (this.args) {
                result = this.args[token.value] || null;
                if (Array.isArray(result) && result.length === 1) {
                    result = result[0];
                } else if (Array.isArray(result)) {
                    const group = new MathAtom(this.parseMode, 'group');
                    group.body = result;
                    result = group;
                } else {
                    // If there is no argument value specified, use a placeholder
                    result = this.placeholder();
                }
            }
        } else {
            console.warn('Unexpected token type "' + token.type + '"');
        }
        return result;
    }
    /**
     * Attempt to scan the macro name and return an atom list if successful.
     * Otherwise, it wasn't a macro.
     */
    scanMacro(macro) {
        if (!this.macros || !this.macros[macro]) return null;
        const args = {};
        let def;
        let argCount = 0;
        if (typeof this.macros[macro] === 'string') {
            def = this.macros[macro];
            // Let's see if there are arguments in the definition.
            if (/(^|[^\\])#1/.test(def)) argCount = 1;
            if (/(^|[^\\])#2/.test(def)) argCount = 2;
            if (/(^|[^\\])#3/.test(def)) argCount = 3;
            if (/(^|[^\\])#4/.test(def)) argCount = 4;
            if (/(^|[^\\])#5/.test(def)) argCount = 5;
            if (/(^|[^\\])#6/.test(def)) argCount = 6;
            if (/(^|[^\\])#7/.test(def)) argCount = 7;
            if (/(^|[^\\])#8/.test(def)) argCount = 8;
            if (/(^|[^\\])#9/.test(def)) argCount = 9;
        } else {
            def = this.macros[macro].def;
            argCount = (this.macros[macro].args || 0);
        }
        for (let i = 1; i <= argCount; i++) {
            args[i] = this.scanArg();
        }
        // Carry forward the placeholder argument, if any.
        if (this.args && typeof this.args['?'] === 'string') {
            args['?'] = this.args['?'];
        }
        // Group the result of the macro expansion, and set the
        // captureSelection attribute so that it is handled as an unbreakable
        // unit
        const atom = new MathAtom(this.parseMode, 'group', parseTokens(Lexer.tokenize(def), this.parseMode, args, this.macros));
        atom.captureSelection = true;
        atom.latex = '\\' + macro;
        let argString = '';
        for (let i = 1; i <= argCount; i++) {
            argString += '{';
            if (Array.isArray(args[i])) {
                for (let j = 0; j < args[i].length; j++) {
                    argString += args[i][j].latex;
                }
            }
            argString += '}';
        }
        atom.latex += argString ? argString : ' ';
        return atom;
    }
    /**
     * Make a MathAtom for the current token or token group and
     * add it to the parser's current mathList
     * @method Parser#parseAtom
     * @private
     */
    parseAtom() {
        let result = this.scanEnvironment() ||
            this.scanModeShift() ||
            this.scanModeSet() ||
            this.scanGroup() ||
            this.scanLeftRight();
        if (!result && (this.parseSupSub() || this.parseLimits())) return true;
        if (!result) result = this.scanToken();
        // If we have an atom to add, push it at the end of the current math list
        // We could have no atom for tokens that were skipped, a ' ' in mathmode
        // for example
        if (Array.isArray(result)) {
            this.mathList = this.mathList.concat(result);
        } else if (result) {
            this.mathList.push(result);
        }
        return result !== null;
    }
}





















const SIZING_COMMANDS = [
    'tiny', 'scriptsize', 'footnotesize', 'small',
    'normalsize',
    'large', 'Large', 'LARGE', 'huge', 'Huge',
];

const MATHSTYLE_COMMANDS = [
    'displaystyle', 'textstyle', 'scriptstyle', 'scriptscriptstyle',
]













































/**
 * Given an array of tokens returned by the lexer, return a corresponding
 * math list (array of atoms).
 * @param {Array.<Token>} tokens
 * @param {string} [parseMode='math']
 * @param {Array.<string>} [args={}] - If there are any placeholder tokens, e.g.
 * `#0`, `#1`, etc... they will be replaced by the value provided by `args`.
 * @param {*} [macro={}] Dictionary defining macros
 * @param {boolean} [smartFence=false] If true, promote plain fences, e.g. `(`,
 * as `\left...\right` or `\mleft...\mright`
 * @return {Array.<MathAtom>}
 * @private
 */
function parseTokens(tokens, parseMode, args, macros, smartFence) {
    let mathlist = [];
    const parser = new Parser(tokens, args, macros);
    parser.parseMode = parseMode || 'math';  // other possible values: 'text', 'color', etc...
    if (smartFence) parser.smartFence = true;

    while(!parser.end()) {
        mathlist = mathlist.concat(parser.scanImplicitGroup());
    }
    return mathlist;
}

// Export the public interface for this module
export default {
    Parser: Parser,
    parseTokens: parseTokens
}