@@ -371,141 +371,101 @@ static const char *jsonKindToString(json::Value::Kind K) {
371371 llvm_unreachable (" Unknown json::Value::Kind" );
372372}
373373
374- static Tag findNextTag (StringRef Template, size_t StartPos, StringRef Open,
375- StringRef Close) {
376- const StringLiteral TripleOpen (" {{{" );
377- const StringLiteral TripleClose (" }}}" );
378-
379- size_t NormalOpenPos = Template.find (Open, StartPos);
380- size_t TripleOpenPos = Template.find (TripleOpen, StartPos);
381-
382- Tag Result;
383-
384- // Determine which tag comes first.
385- if (TripleOpenPos != StringRef::npos &&
386- (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
387- // Found a triple mustache tag.
388- size_t EndPos =
389- Template.find (TripleClose, TripleOpenPos + TripleOpen.size ());
390- if (EndPos == StringRef::npos)
391- return Result; // No closing tag found.
392-
393- Result.TagKind = Tag::Kind::Triple;
394- Result.StartPosition = TripleOpenPos;
395- size_t ContentStart = TripleOpenPos + TripleOpen.size ();
396- Result.Content = Template.substr (ContentStart, EndPos - ContentStart);
397- Result.FullMatch = Template.substr (
398- TripleOpenPos, (EndPos + TripleClose.size ()) - TripleOpenPos);
399- } else if (NormalOpenPos != StringRef::npos) {
400- // Found a normal mustache tag.
401- size_t EndPos = Template.find (Close, NormalOpenPos + Open.size ());
402- if (EndPos == StringRef::npos)
403- return Result; // No closing tag found.
404-
405- Result.TagKind = Tag::Kind::Normal;
406- Result.StartPosition = NormalOpenPos;
407- size_t ContentStart = NormalOpenPos + Open.size ();
408- Result.Content = Template.substr (ContentStart, EndPos - ContentStart);
409- Result.FullMatch =
410- Template.substr (NormalOpenPos, (EndPos + Close.size ()) - NormalOpenPos);
411- }
412-
413- return Result;
414- }
415-
416- static std::optional<std::pair<StringRef, StringRef>>
417- processTag (const Tag &T, SmallVectorImpl<Token> &Tokens, MustacheContext &Ctx) {
418- LLVM_DEBUG (dbgs () << " [Tag] " << T.FullMatch << " , Content: " << T.Content
419- << " , Kind: " << tagKindToString (T.TagKind ) << " \n " );
420- if (T.TagKind == Tag::Kind::Triple) {
421- Tokens.emplace_back (T.FullMatch , Ctx.Saver .save (" &" + T.Content ), ' &' , Ctx);
422- return std::nullopt ;
423- }
424- StringRef Interpolated = T.Content ;
425- if (!Interpolated.trim ().starts_with (" =" )) {
426- char Front = Interpolated.empty () ? ' ' : Interpolated.trim ().front ();
427- Tokens.emplace_back (T.FullMatch , Interpolated, Front, Ctx);
428- return std::nullopt ;
429- }
430- Tokens.emplace_back (T.FullMatch , Interpolated, ' =' , Ctx);
431- StringRef DelimSpec = Interpolated.trim ();
432- DelimSpec = DelimSpec.drop_front (1 );
433- DelimSpec = DelimSpec.take_until ([](char C) { return C == ' =' ; });
434- DelimSpec = DelimSpec.trim ();
435-
436- auto [NewOpen, NewClose] = DelimSpec.split (' ' );
437- LLVM_DEBUG (dbgs () << " [Set Delimiter] NewOpen: " << NewOpen
438- << " , NewClose: " << NewClose << " \n " );
439- return std::make_pair (NewOpen, NewClose);
440- }
441-
442374// Simple tokenizer that splits the template into tokens.
443- // The mustache spec allows {{{ }}} to unescape variables,
444- // but we don't support that here. An unescape variable
445- // is represented only by {{& variable}}.
446375static SmallVector<Token> tokenize (StringRef Template, MustacheContext &Ctx) {
447376 LLVM_DEBUG (dbgs () << " [Tokenize Template] \" " << Template << " \"\n " );
448377 SmallVector<Token> Tokens;
449378 SmallString<8 > Open (" {{" );
450379 SmallString<8 > Close (" }}" );
451- size_t Start = 0 ;
380+ size_t Cursor = 0 ;
381+ size_t TextStart = 0 ;
382+
383+ const StringLiteral TripleOpen (" {{{" );
384+ const StringLiteral TripleClose (" }}}" );
452385
453- while (Start < Template.size ()) {
454- LLVM_DEBUG (dbgs () << " [Tokenize Loop] Start=" << Start << " , Open='" << Open
455- << " ', Close='" << Close << " '\n " );
456- Tag T = findNextTag (Template, Start, Open, Close);
386+ while (Cursor < Template.size ()) {
387+ StringRef TemplateSuffix = Template.substr (Cursor);
388+ StringRef TagOpen, TagClose;
389+ Tag::Kind Kind;
390+
391+ // Determine which tag we've encountered.
392+ if (TemplateSuffix.starts_with (TripleOpen)) {
393+ Kind = Tag::Kind::Triple;
394+ TagOpen = TripleOpen;
395+ TagClose = TripleClose;
396+ } else if (TemplateSuffix.starts_with (Open)) {
397+ Kind = Tag::Kind::Normal;
398+ TagOpen = Open;
399+ TagClose = Close;
400+ } else {
401+ // Not at a tag, continue scanning.
402+ ++Cursor;
403+ continue ;
404+ }
457405
458- if (T.TagKind == Tag::Kind::None) {
459- // No more tags, the rest is text.
460- Tokens.emplace_back (Template.substr (Start));
461- break ;
406+ // Found a tag, first add the preceding text.
407+ if (Cursor > TextStart) {
408+ Tokens.emplace_back (Template.slice (TextStart, Cursor));
462409 }
463410
464- // Add the text before the tag.
465- if (T.StartPosition > Start) {
466- StringRef Text = Template.substr (Start, T.StartPosition - Start);
467- Tokens.emplace_back (Text);
411+ // Find the closing tag.
412+ size_t EndPos = Template.find (TagClose, Cursor + TagOpen.size ());
413+ if (EndPos == StringRef::npos) {
414+ // No closing tag, the rest is text.
415+ Tokens.emplace_back (Template.substr (Cursor));
416+ TextStart = Cursor = Template.size ();
417+ break ;
468418 }
469419
470- if (auto NewDelims = processTag (T, Tokens, Ctx)) {
471- std::tie (Open, Close) = *NewDelims;
420+ // Extract tag content and full match.
421+ size_t ContentStart = Cursor + TagOpen.size ();
422+ StringRef Content = Template.substr (ContentStart, EndPos - ContentStart);
423+ StringRef FullMatch =
424+ Template.substr (Cursor, (EndPos + TagClose.size ()) - Cursor);
425+
426+ // Process the tag (inlined logic from processTag).
427+ LLVM_DEBUG (dbgs () << " [Tag] " << FullMatch << " , Content: " << Content
428+ << " , Kind: " << tagKindToString (Kind) << " \n " );
429+ if (Kind == Tag::Kind::Triple) {
430+ Tokens.emplace_back (FullMatch, Ctx.Saver .save (" &" + Content), ' &' , Ctx);
431+ } else { // Normal Tag
432+ StringRef Interpolated = Content;
433+ if (!Interpolated.trim ().starts_with (" =" )) {
434+ char Front = Interpolated.empty () ? ' ' : Interpolated.trim ().front ();
435+ Tokens.emplace_back (FullMatch, Interpolated, Front, Ctx);
436+ } else { // Set Delimiter
437+ Tokens.emplace_back (FullMatch, Interpolated, ' =' , Ctx);
438+ StringRef DelimSpec = Interpolated.trim ();
439+ DelimSpec = DelimSpec.drop_front (1 );
440+ DelimSpec = DelimSpec.take_until ([](char C) { return C == ' =' ; });
441+ DelimSpec = DelimSpec.trim ();
442+
443+ auto [NewOpen, NewClose] = DelimSpec.split (' ' );
444+ LLVM_DEBUG (dbgs () << " [Set Delimiter] NewOpen: " << NewOpen
445+ << " , NewClose: " << NewClose << " \n " );
446+ Open = NewOpen;
447+ Close = NewClose;
448+ }
472449 }
473450
474- // Move past the tag.
475- Start = T.StartPosition + T.FullMatch .size ();
451+ // Move past the tag for the next iteration.
452+ Cursor += FullMatch.size ();
453+ TextStart = Cursor;
476454 }
477455
478- // Fix up white spaces for:
479- // - open sections
480- // - inverted sections
481- // - close sections
482- // - comments
483- //
484- // This loop attempts to find standalone tokens and tries to trim out
485- // the surrounding whitespace.
486- // For example:
487- // if you have the template string
488- // {{#section}} \n Example \n{{/section}}
489- // The output should would be
490- // For example:
491- // \n Example \n
456+ // Add any remaining text after the last tag.
457+ if (TextStart < Template.size ()) {
458+ Tokens.emplace_back (Template.substr (TextStart));
459+ }
460+
461+ // Fix up white spaces for standalone tags.
492462 size_t LastIdx = Tokens.size () - 1 ;
493463 for (size_t Idx = 0 , End = Tokens.size (); Idx < End; ++Idx) {
494464 Token &CurrentToken = Tokens[Idx];
495465 Token::Type CurrentType = CurrentToken.getType ();
496- // Check if token type requires cleanup.
497- bool RequiresCleanUp = requiresCleanUp (CurrentType);
498-
499- if (!RequiresCleanUp)
466+ if (!requiresCleanUp (CurrentType))
500467 continue ;
501468
502- // We adjust the token body if there's no text behind or ahead.
503- // A token is considered to have no text ahead if the right of the previous
504- // token is a newline followed by spaces.
505- // A token is considered to have no text behind if the left of the next
506- // token is spaces followed by a newline.
507- // eg.
508- // "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3"
509469 bool HasTextBehind = hasTextBehind (Idx, Tokens);
510470 bool HasTextAhead = hasTextAhead (Idx, Tokens);
511471
0 commit comments