diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index d62a54f576..d7306a2389 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -29,7 +29,8 @@ export enum TokenType { CDATA_END, ATTR_NAME, ATTR_QUOTE, - ATTR_VALUE, + ATTR_VALUE_TEXT, + ATTR_VALUE_INTERPOLATION, DOC_TYPE, EXPANSION_FORM_START, EXPANSION_CASE_VALUE, @@ -228,7 +229,8 @@ class _Tokenizer { this._consumeTagOpen(start); } } else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) { - this._consumeText(); + this._consumeWithInterpolation( + TokenType.TEXT, TokenType.INTERPOLATION, () => this._isTextEnd()); } } catch (e) { this.handleError(e); @@ -595,29 +597,25 @@ class _Tokenizer { private _consumeAttributeValue() { let value: string; if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) { - this._beginToken(TokenType.ATTR_QUOTE); const quoteChar = this._cursor.peek(); - this._cursor.advance(); - this._endToken([String.fromCodePoint(quoteChar)]); - this._beginToken(TokenType.ATTR_VALUE); - const parts: string[] = []; - while (this._cursor.peek() !== quoteChar) { - parts.push(this._readChar(true)); - } - value = parts.join(''); - this._endToken([this._processCarriageReturns(value)]); - this._beginToken(TokenType.ATTR_QUOTE); - this._cursor.advance(); - this._endToken([String.fromCodePoint(quoteChar)]); + this._consumeQuote(quoteChar); + this._consumeWithInterpolation( + TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION, + () => this._cursor.peek() === quoteChar); + this._consumeQuote(quoteChar); } else { - this._beginToken(TokenType.ATTR_VALUE); - const valueStart = this._cursor.clone(); - this._requireCharCodeUntilFn(isNameEnd, 1); - value = this._cursor.getChars(valueStart); - this._endToken([this._processCarriageReturns(value)]); + const endPredicate = () => isNameEnd(this._cursor.peek()); + this._consumeWithInterpolation( + TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION, endPredicate); } } + private _consumeQuote(quoteChar: number) { + this._beginToken(TokenType.ATTR_QUOTE); + this._requireCharCode(quoteChar); + this._endToken([String.fromCodePoint(quoteChar)]); + } + private _consumeTagOpenEnd() { const tokenType = this._attemptCharCode(chars.$SLASH) ? TokenType.TAG_OPEN_END_VOID : TokenType.TAG_OPEN_END; @@ -696,21 +694,31 @@ class _Tokenizer { this._expansionCaseStack.pop(); } - private _consumeText() { - this._beginToken(TokenType.TEXT); + /** + * Consume a string that may contain interpolation expressions. + * The first token consumed will be of `tokenType` and then there will be alternating + * `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true. + * + * @param textTokenType the kind of tokens to interleave around interpolation tokens. + * @param interpolationTokenType the kind of tokens that contain interpolation. + * @param endPredicate a function that should return true when we should stop consuming. + */ + private _consumeWithInterpolation( + textTokenType: TokenType, interpolationTokenType: TokenType, endPredicate: () => boolean) { + this._beginToken(textTokenType); const parts: string[] = []; - do { + while (!endPredicate()) { const current = this._cursor.clone(); if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) { this._endToken([this._processCarriageReturns(parts.join(''))], current); - this._consumeInterpolation(current); + this._consumeInterpolation(interpolationTokenType, current); parts.length = 0; - this._beginToken(TokenType.TEXT); + this._beginToken(textTokenType); } else { parts.push(this._readChar(true)); } - } while (!this._isTextEnd()); + } // It is possible that an interpolation was started but not ended inside this text token. // Make sure that we reset the state of the lexer correctly. @@ -719,14 +727,15 @@ class _Tokenizer { this._endToken([this._processCarriageReturns(parts.join(''))]); } - private _consumeInterpolation(interpolationStart: CharacterCursor) { + private _consumeInterpolation( + interpolationTokenType: TokenType, interpolationStart: CharacterCursor) { const parts: string[] = []; - this._beginToken(TokenType.INTERPOLATION, interpolationStart); + this._beginToken(interpolationTokenType, interpolationStart); parts.push(this._interpolationConfig.start); // Find the end of the interpolation, ignoring content inside quotes. const expressionStart = this._cursor.clone(); - let inQuote: string|null = null; + let inQuote: number|null = null; let inComment = false; while (this._cursor.peek() !== chars.$EOF) { const current = this._cursor.clone(); @@ -752,14 +761,15 @@ class _Tokenizer { } } - const char = this._readChar(true); - if (char === '\\') { + const char = this._cursor.peek(); + this._cursor.advance(); + if (char === chars.$BACKSLASH) { // Skip the next character because it was escaped. - this._readChar(true); + this._cursor.advance(); } else if (char === inQuote) { // Exiting the current quoted string inQuote = null; - } else if (!inComment && /['"`]/.test(char)) { + } else if (!inComment && chars.isQuote(char)) { // Entering a new quoted string inQuote = char; } diff --git a/packages/compiler/src/ml_parser/parser.ts b/packages/compiler/src/ml_parser/parser.ts index fd01357d43..9ac0b944e4 100644 --- a/packages/compiler/src/ml_parser/parser.ts +++ b/packages/compiler/src/ml_parser/parser.ts @@ -6,7 +6,7 @@ * found in the LICENSE file at https://angular.io/license */ -import {ParseError, ParseSourceSpan} from '../parse_util'; +import {ParseError, ParseLocation, ParseSourceSpan} from '../parse_util'; import * as html from './ast'; import {NAMED_ENTITIES} from './entities'; @@ -362,27 +362,49 @@ class _TreeBuilder { private _consumeAttr(attrName: lex.Token): html.Attribute { const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]); - let end = attrName.sourceSpan.end; - let value = ''; - let valueSpan: ParseSourceSpan = undefined!; + let attrEnd = attrName.sourceSpan.end; + + // Consume any quote if (this._peek.type === lex.TokenType.ATTR_QUOTE) { this._advance(); } - if (this._peek.type === lex.TokenType.ATTR_VALUE) { - const valueToken = this._advance(); - value = valueToken.parts[0]; - end = valueToken.sourceSpan.end; - valueSpan = valueToken.sourceSpan; + + // Consume the value + let value = ''; + let valueStartSpan: ParseSourceSpan|undefined = undefined; + let valueEnd: ParseLocation|undefined = undefined; + if (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT) { + valueStartSpan = this._peek.sourceSpan; + valueEnd = this._peek.sourceSpan.end; + // For now we are recombining text and interpolation tokens + while (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT || + this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) { + let valueToken = this._advance(); + if (valueToken.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) { + // For backward compatibility we decode HTML entities that appear in interpolation + // expressions. This is arguably a bug, but it could be a considerable breaking change to + // fix it. It should be addressed in a larger project to refactor the entire parser/lexer + // chain after View Engine has been removed. + value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity); + } else { + value += valueToken.parts.join(''); + } + valueEnd = attrEnd = valueToken.sourceSpan.end; + } } + + // Consume any quote if (this._peek.type === lex.TokenType.ATTR_QUOTE) { const quoteToken = this._advance(); - end = quoteToken.sourceSpan.end; + attrEnd = quoteToken.sourceSpan.end; } - const keySpan = new ParseSourceSpan(attrName.sourceSpan.start, attrName.sourceSpan.end); + + const valueSpan = valueStartSpan && valueEnd && + new ParseSourceSpan(valueStartSpan.start, valueEnd, valueStartSpan.fullStart); return new html.Attribute( fullName, value, - new ParseSourceSpan(attrName.sourceSpan.start, end, attrName.sourceSpan.fullStart), keySpan, - valueSpan); + new ParseSourceSpan(attrName.sourceSpan.start, attrEnd, attrName.sourceSpan.fullStart), + attrName.sourceSpan, valueSpan); } private _getParentElement(): html.Element|null { diff --git a/packages/compiler/test/ml_parser/html_parser_spec.ts b/packages/compiler/test/ml_parser/html_parser_spec.ts index 279bca60d3..4fbfa75692 100644 --- a/packages/compiler/test/ml_parser/html_parser_spec.ts +++ b/packages/compiler/test/ml_parser/html_parser_spec.ts @@ -250,6 +250,19 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes} ]); }); + it('should decode HTML entities in interpolated attributes', () => { + // Note that the detail of decoding corner-cases is tested in the + // "should decode HTML entities in interpolations" spec. + expect(humanizeDomSourceSpans(parser.parse('
', 'TestComp'))) + .toEqual([ + [ + html.Element, 'div', 0, '', '