@@ -294,6 +294,39 @@ void Lexer::lexCommentText(Token &T) {
294294 assert (CommentState == LCS_InsideBCPLComment ||
295295 CommentState == LCS_InsideCComment);
296296
297+ // Handles lexing non-command text, i.e. text and newline.
298+ auto HandleNonCommandToken = [&]() -> void {
299+ assert (State == LS_Normal);
300+
301+ const char *TokenPtr = BufferPtr;
302+ assert (TokenPtr < CommentEnd);
303+ switch (*TokenPtr) {
304+ case ' \n ' :
305+ case ' \r ' :
306+ TokenPtr = skipNewline (TokenPtr, CommentEnd);
307+ formTokenWithChars (T, TokenPtr, tok::newline);
308+
309+ if (CommentState == LCS_InsideCComment)
310+ skipLineStartingDecorations ();
311+ return ;
312+
313+ default : {
314+ StringRef TokStartSymbols = ParseCommands ? " \n\r\\ @&<" : " \n\r " ;
315+ size_t End = StringRef (TokenPtr, CommentEnd - TokenPtr)
316+ .find_first_of (TokStartSymbols);
317+ if (End != StringRef::npos)
318+ TokenPtr += End;
319+ else
320+ TokenPtr = CommentEnd;
321+ formTextToken (T, TokenPtr);
322+ return ;
323+ }
324+ }
325+ };
326+
327+ if (!ParseCommands)
328+ return HandleNonCommandToken ();
329+
297330 switch (State) {
298331 case LS_Normal:
299332 break ;
@@ -315,136 +348,116 @@ void Lexer::lexCommentText(Token &T) {
315348 }
316349
317350 assert (State == LS_Normal);
318-
319351 const char *TokenPtr = BufferPtr;
320352 assert (TokenPtr < CommentEnd);
321- while (TokenPtr != CommentEnd) {
322- switch (*TokenPtr) {
323- case ' \\ ' :
324- case ' @' : {
325- // Commands that start with a backslash and commands that start with
326- // 'at' have equivalent semantics. But we keep information about the
327- // exact syntax in AST for comments.
328- tok::TokenKind CommandKind =
329- (*TokenPtr == ' @' ) ? tok::at_command : tok::backslash_command;
353+ switch (*TokenPtr) {
354+ case ' \\ ' :
355+ case ' @' : {
356+ // Commands that start with a backslash and commands that start with
357+ // 'at' have equivalent semantics. But we keep information about the
358+ // exact syntax in AST for comments.
359+ tok::TokenKind CommandKind =
360+ (*TokenPtr == ' @' ) ? tok::at_command : tok::backslash_command;
361+ TokenPtr++;
362+ if (TokenPtr == CommentEnd) {
363+ formTextToken (T, TokenPtr);
364+ return ;
365+ }
366+ char C = *TokenPtr;
367+ switch (C) {
368+ default :
369+ break ;
370+
371+ case ' \\ ' : case ' @' : case ' &' : case ' $' :
372+ case ' #' : case ' <' : case ' >' : case ' %' :
373+ case ' \" ' : case ' .' : case ' :' :
374+ // This is one of \\ \@ \& \$ etc escape sequences.
330375 TokenPtr++;
331- if (TokenPtr == CommentEnd) {
332- formTextToken (T, TokenPtr);
333- return ;
334- }
335- char C = *TokenPtr;
336- switch (C) {
337- default :
338- break ;
339-
340- case ' \\ ' : case ' @' : case ' &' : case ' $' :
341- case ' #' : case ' <' : case ' >' : case ' %' :
342- case ' \" ' : case ' .' : case ' :' :
343- // This is one of \\ \@ \& \$ etc escape sequences.
376+ if (C == ' :' && TokenPtr != CommentEnd && *TokenPtr == ' :' ) {
377+ // This is the \:: escape sequence.
344378 TokenPtr++;
345- if (C == ' :' && TokenPtr != CommentEnd && *TokenPtr == ' :' ) {
346- // This is the \:: escape sequence.
347- TokenPtr++;
348- }
349- StringRef UnescapedText (BufferPtr + 1 , TokenPtr - (BufferPtr + 1 ));
350- formTokenWithChars (T, TokenPtr, tok::text);
351- T.setText (UnescapedText);
352- return ;
353379 }
380+ StringRef UnescapedText (BufferPtr + 1 , TokenPtr - (BufferPtr + 1 ));
381+ formTokenWithChars (T, TokenPtr, tok::text);
382+ T.setText (UnescapedText);
383+ return ;
384+ }
354385
355- // Don't make zero-length commands.
356- if (!isCommandNameStartCharacter (*TokenPtr)) {
357- formTextToken (T, TokenPtr);
358- return ;
359- }
386+ // Don't make zero-length commands.
387+ if (!isCommandNameStartCharacter (*TokenPtr)) {
388+ formTextToken (T, TokenPtr);
389+ return ;
390+ }
360391
361- TokenPtr = skipCommandName (TokenPtr, CommentEnd);
362- unsigned Length = TokenPtr - (BufferPtr + 1 );
363-
364- // Hardcoded support for lexing LaTeX formula commands
365- // \f$ \f[ \f] \f{ \f} as a single command.
366- if (Length == 1 && TokenPtr[-1 ] == ' f' && TokenPtr != CommentEnd) {
367- C = *TokenPtr;
368- if (C == ' $' || C == ' [' || C == ' ]' || C == ' {' || C == ' }' ) {
369- TokenPtr++;
370- Length++;
371- }
372- }
392+ TokenPtr = skipCommandName (TokenPtr, CommentEnd);
393+ unsigned Length = TokenPtr - (BufferPtr + 1 );
373394
374- StringRef CommandName (BufferPtr + 1 , Length);
375-
376- const CommandInfo *Info = Traits.getCommandInfoOrNULL (CommandName);
377- if (!Info) {
378- if ((Info = Traits.getTypoCorrectCommandInfo (CommandName))) {
379- StringRef CorrectedName = Info->Name ;
380- SourceLocation Loc = getSourceLocation (BufferPtr);
381- SourceLocation EndLoc = getSourceLocation (TokenPtr);
382- SourceRange FullRange = SourceRange (Loc, EndLoc);
383- SourceRange CommandRange (Loc.getLocWithOffset (1 ), EndLoc);
384- Diag (Loc, diag::warn_correct_comment_command_name)
385- << FullRange << CommandName << CorrectedName
386- << FixItHint::CreateReplacement (CommandRange, CorrectedName);
387- } else {
388- formTokenWithChars (T, TokenPtr, tok::unknown_command);
389- T.setUnknownCommandName (CommandName);
390- Diag (T.getLocation (), diag::warn_unknown_comment_command_name)
391- << SourceRange (T.getLocation (), T.getEndLocation ());
392- return ;
393- }
394- }
395- if (Info->IsVerbatimBlockCommand ) {
396- setupAndLexVerbatimBlock (T, TokenPtr, *BufferPtr, Info);
397- return ;
398- }
399- if (Info->IsVerbatimLineCommand ) {
400- setupAndLexVerbatimLine (T, TokenPtr, Info);
401- return ;
395+ // Hardcoded support for lexing LaTeX formula commands
396+ // \f$ \f[ \f] \f{ \f} as a single command.
397+ if (Length == 1 && TokenPtr[-1 ] == ' f' && TokenPtr != CommentEnd) {
398+ C = *TokenPtr;
399+ if (C == ' $' || C == ' [' || C == ' ]' || C == ' {' || C == ' }' ) {
400+ TokenPtr++;
401+ Length++;
402402 }
403- formTokenWithChars (T, TokenPtr, CommandKind);
404- T.setCommandID (Info->getID ());
405- return ;
406403 }
407404
408- case ' &' :
409- lexHTMLCharacterReference (T);
410- return ;
411-
412- case ' <' : {
413- TokenPtr++;
414- if (TokenPtr == CommentEnd) {
415- formTextToken (T, TokenPtr);
405+ StringRef CommandName (BufferPtr + 1 , Length);
406+
407+ const CommandInfo *Info = Traits.getCommandInfoOrNULL (CommandName);
408+ if (!Info) {
409+ if ((Info = Traits.getTypoCorrectCommandInfo (CommandName))) {
410+ StringRef CorrectedName = Info->Name ;
411+ SourceLocation Loc = getSourceLocation (BufferPtr);
412+ SourceLocation EndLoc = getSourceLocation (TokenPtr);
413+ SourceRange FullRange = SourceRange (Loc, EndLoc);
414+ SourceRange CommandRange (Loc.getLocWithOffset (1 ), EndLoc);
415+ Diag (Loc, diag::warn_correct_comment_command_name)
416+ << FullRange << CommandName << CorrectedName
417+ << FixItHint::CreateReplacement (CommandRange, CorrectedName);
418+ } else {
419+ formTokenWithChars (T, TokenPtr, tok::unknown_command);
420+ T.setUnknownCommandName (CommandName);
421+ Diag (T.getLocation (), diag::warn_unknown_comment_command_name)
422+ << SourceRange (T.getLocation (), T.getEndLocation ());
416423 return ;
417424 }
418- const char C = *TokenPtr;
419- if (isHTMLIdentifierStartingCharacter (C))
420- setupAndLexHTMLStartTag (T);
421- else if (C == ' /' )
422- setupAndLexHTMLEndTag (T);
423- else
424- formTextToken (T, TokenPtr);
425+ }
426+ if (Info->IsVerbatimBlockCommand ) {
427+ setupAndLexVerbatimBlock (T, TokenPtr, *BufferPtr, Info);
425428 return ;
426429 }
427-
428- case ' \n ' :
429- case ' \r ' :
430- TokenPtr = skipNewline (TokenPtr, CommentEnd);
431- formTokenWithChars (T, TokenPtr, tok::newline);
432-
433- if (CommentState == LCS_InsideCComment)
434- skipLineStartingDecorations ();
430+ if (Info->IsVerbatimLineCommand ) {
431+ setupAndLexVerbatimLine (T, TokenPtr, Info);
435432 return ;
433+ }
434+ formTokenWithChars (T, TokenPtr, CommandKind);
435+ T.setCommandID (Info->getID ());
436+ return ;
437+ }
436438
437- default : {
438- size_t End = StringRef (TokenPtr, CommentEnd - TokenPtr).
439- find_first_of ( " \n\r\\ @&< " ) ;
440- if (End != StringRef::npos)
441- TokenPtr += End;
442- else
443- TokenPtr = CommentEnd;
439+ case ' & ' :
440+ lexHTMLCharacterReference (T);
441+ return ;
442+
443+ case ' < ' : {
444+ TokenPtr++;
445+ if ( TokenPtr == CommentEnd) {
444446 formTextToken (T, TokenPtr);
445447 return ;
446448 }
449+ const char C = *TokenPtr;
450+ if (isHTMLIdentifierStartingCharacter (C))
451+ setupAndLexHTMLStartTag (T);
452+ else if (C == ' /' )
453+ setupAndLexHTMLEndTag (T);
454+ else
455+ formTextToken (T, TokenPtr);
456+ return ;
447457 }
458+
459+ default :
460+ return HandleNonCommandToken ();
448461 }
449462}
450463
@@ -727,14 +740,13 @@ void Lexer::lexHTMLEndTag(Token &T) {
727740}
728741
729742Lexer::Lexer (llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
730- const CommandTraits &Traits,
731- SourceLocation FileLoc,
732- const char *BufferStart, const char *BufferEnd):
733- Allocator (Allocator), Diags(Diags), Traits(Traits),
734- BufferStart (BufferStart), BufferEnd(BufferEnd),
735- FileLoc (FileLoc), BufferPtr(BufferStart),
736- CommentState (LCS_BeforeComment), State(LS_Normal) {
737- }
743+ const CommandTraits &Traits, SourceLocation FileLoc,
744+ const char *BufferStart, const char *BufferEnd,
745+ bool ParseCommands)
746+ : Allocator(Allocator), Diags(Diags), Traits(Traits),
747+ BufferStart (BufferStart), BufferEnd(BufferEnd), FileLoc(FileLoc),
748+ BufferPtr(BufferStart), CommentState(LCS_BeforeComment), State(LS_Normal),
749+ ParseCommands(ParseCommands) {}
738750
739751void Lexer::lex (Token &T) {
740752again:
0 commit comments