1#include "tokenizeString.h"
12static int getFindOptions(
bool allowA,
bool allowG);
13static void prepareRuleSearch(
16 const std::string* endRegexSource,
19 CompiledRule*& ruleScanner,
22static void prepareRuleWhileSearch(
25 const std::string* endRegexSource,
28 CompiledRule*& ruleScanner,
33static int getFindOptions(
bool allowA,
bool allowG) {
34 int options = FindOption::None;
36 options |= FindOption::NotBeginString;
39 options |= FindOption::NotBeginPosition;
45static void prepareRuleSearch(
48 const std::string* endRegexSource,
51 CompiledRule*& ruleScanner,
56 ruleScanner = rule->compileAG(grammar, grammar, endRegexSource, allowA, allowG);
57 findOptions = FindOption::None;
61static void prepareRuleWhileSearch(
64 const std::string* endRegexSource,
67 CompiledRule*& ruleScanner,
70 ruleScanner = rule->compileWhileAG(grammar, endRegexSource, allowA, allowG);
71 findOptions = FindOption::None;
75IWhileCheckResult _checkWhileConditions(
80 StateStackImpl* stack,
81 LineTokens* lineTokens
83 IWhileCheckResult result;
85 result.linePos = linePos;
86 result.isFirstLine = isFirstLine;
87 result.anchorPosition = (stack->beginRuleCapturedEOL ? 0 : -1);
91 StateStackImpl* stack;
94 std::vector<WhileStack> whileRules;
96 for (StateStackImpl* node = stack; node !=
nullptr; node = node->parent) {
97 Rule* nodeRule = node->getRule(grammar);
98 BeginWhileRule* whileRule =
dynamic_cast<BeginWhileRule*
>(nodeRule);
103 whileRules.push_back(ws);
108 std::reverse(whileRules.begin(), whileRules.end());
110 for (
const auto& whileRuleEntry : whileRules) {
111 CompiledRule* ruleScanner =
nullptr;
113 prepareRuleWhileSearch(
116 whileRuleEntry.stack->endRule,
118 linePos == result.anchorPosition,
123 IOnigMatch* r = ruleScanner->scanner->findNextMatchSync(
130 RuleId matchedRuleId = ruleScanner->rules[r->index];
133 result.stack = whileRuleEntry.stack->parent;
137 if (!r->captureIndices.empty()) {
138 lineTokens->produce(whileRuleEntry.stack, r->captureIndices[0].start);
143 whileRuleEntry.stack,
145 whileRuleEntry.rule->whileCaptures,
148 lineTokens->produce(whileRuleEntry.stack, r->captureIndices[0].end);
149 result.anchorPosition = r->captureIndices[0].end;
150 if (r->captureIndices[0].end > linePos) {
151 result.linePos = r->captureIndices[0].end;
152 result.isFirstLine =
false;
158 result.stack = whileRuleEntry.stack->parent;
167IMatchResult* matchRuleOrInjections(
169 OnigString* lineText,
172 StateStackImpl* stack,
176 IMatchResult* matchResult = matchRule(
186 std::vector<Injection> injections = grammar->getInjections();
187 if (injections.empty()) {
192 IMatchInjectionsResult* injectionResult = matchInjections(
202 if (!injectionResult) {
209 IMatchResult* result =
new IMatchResult();
210 result->captureIndices = injectionResult->captureIndices;
211 result->matchedRuleId = injectionResult->matchedRuleId;
212 delete injectionResult;
217 int matchResultScore = matchResult->captureIndices[0].start;
218 int injectionResultScore = injectionResult->captureIndices[0].start;
220 if (injectionResultScore < matchResultScore ||
221 (injectionResult->priorityMatch && injectionResultScore == matchResultScore)) {
223 IMatchResult* result =
new IMatchResult();
224 result->captureIndices = injectionResult->captureIndices;
225 result->matchedRuleId = injectionResult->matchedRuleId;
227 delete injectionResult;
231 delete injectionResult;
236IMatchResult* matchRule(
238 OnigString* lineText,
241 StateStackImpl* stack,
244 Rule* rule = stack->getRule(grammar);
249 CompiledRule* ruleScanner =
nullptr;
256 linePos == anchorPosition,
261 IOnigMatch* r = ruleScanner->scanner->findNextMatchSync(
268 IMatchResult* result =
new IMatchResult();
269 result->captureIndices = r->captureIndices;
270 result->matchedRuleId = ruleScanner->rules[r->index];
279IMatchInjectionsResult* matchInjections(
280 const std::vector<Injection>& injections,
282 OnigString* lineText,
285 StateStackImpl* stack,
289 int bestMatchRating = std::numeric_limits<int>::max();
290 std::vector<IOnigCaptureIndex> bestMatchCaptureIndices;
292 int bestMatchResultPriority = 0;
293 bool foundMatch =
false;
295 std::vector<std::string> scopes = stack->contentNameScopesList->getScopeNames();
297 for (
size_t i = 0; i < injections.size(); i++) {
298 const Injection& injection = injections[i];
299 if (!injection.matcher(scopes)) {
304 Rule* rule = grammar->getRule(injection.ruleId);
309 CompiledRule* ruleScanner =
nullptr;
316 linePos == anchorPosition,
321 IOnigMatch* matchResult = ruleScanner->scanner->findNextMatchSync(
331 int matchRating = matchResult->captureIndices[0].start;
332 if (matchRating >= bestMatchRating) {
338 bestMatchRating = matchRating;
339 bestMatchCaptureIndices = matchResult->captureIndices;
340 bestMatchRuleId = ruleScanner->rules[matchResult->index];
341 bestMatchResultPriority = injection.priority;
346 if (bestMatchRating == linePos) {
353 IMatchInjectionsResult* result =
new IMatchInjectionsResult();
354 result->priorityMatch = (bestMatchResultPriority == -1);
355 result->captureIndices = bestMatchCaptureIndices;
356 result->matchedRuleId = bestMatchRuleId;
366 OnigString* lineText,
368 StateStackImpl* stack,
369 LineTokens* lineTokens,
370 const std::vector<CaptureRule*>& captures,
371 const std::vector<IOnigCaptureIndex>& captureIndices
373 if (captures.empty()) {
377 const std::string& lineTextContent = lineText->content();
379 size_t len = std::min(captures.size(), captureIndices.size());
380 std::vector<LocalStackElement> localStack;
381 int maxEnd = captureIndices[0].end;
383 for (
size_t i = 0; i < len; i++) {
384 const CaptureRule* captureRule = captures[i];
385 if (captureRule ==
nullptr) {
390 const IOnigCaptureIndex& captureIndex = captureIndices[i];
392 if (captureIndex.length == 0) {
397 if (captureIndex.start > maxEnd) {
403 while (!localStack.empty() &&
404 localStack.back().endPos <= captureIndex.start) {
406 lineTokens->produceFromScopes(
407 localStack.back().scopes,
408 localStack.back().endPos
410 localStack.pop_back();
413 if (!localStack.empty()) {
414 lineTokens->produceFromScopes(
415 localStack.back().scopes,
419 lineTokens->produce(stack, captureIndex.start);
422 if (
ruleIdToNumber(captureRule->retokenizeCapturedWithRuleId) != 0) {
424 std::string* scopeName = captureRule->getName(
428 AttributedScopeStack* nameScopesList =
429 stack->contentNameScopesList->pushAttributed(
430 scopeName ? *scopeName :
"",
434 std::string* contentName = captureRule->getContentName(
438 AttributedScopeStack* contentNameScopesList =
439 nameScopesList->pushAttributed(
440 contentName ? *contentName :
"",
444 StateStackImpl* stackClone = stack->push(
445 captureRule->retokenizeCapturedWithRuleId,
451 contentNameScopesList
454 std::string substring = lineTextContent.substr(0, captureIndex.end);
455 OnigString* onigSubStr = grammar->createOnigString(substring);
456 StackElement subResult = tokenizeString(
459 (isFirstLine && captureIndex.start == 0),
473 std::string* captureRuleScopeName = captureRule->getName(
478 if (captureRuleScopeName !=
nullptr && !captureRuleScopeName->empty()) {
480 AttributedScopeStack* base = !localStack.empty()
481 ? localStack.back().scopes
482 : stack->contentNameScopesList;
483 AttributedScopeStack* captureRuleScopesList =
484 base->pushAttributed(*captureRuleScopeName, grammar);
485 localStack.push_back(
486 LocalStackElement(captureRuleScopesList, captureIndex.end)
490 delete captureRuleScopeName;
493 while (!localStack.empty()) {
495 lineTokens->produceFromScopes(
496 localStack.back().scopes,
497 localStack.back().endPos
499 localStack.pop_back();
504StackElement tokenizeString(
506 OnigString* lineText,
509 StateStackImpl* stack,
510 LineTokens* lineTokens,
511 bool checkWhileConditions,
515 int lineLength = lineText->content().length();
517 int anchorPosition = -1;
519 if (checkWhileConditions) {
520 IWhileCheckResult whileCheckResult = _checkWhileConditions(
528 stack = whileCheckResult.stack;
529 linePos = whileCheckResult.linePos;
530 isFirstLine = whileCheckResult.isFirstLine;
531 anchorPosition = whileCheckResult.anchorPosition;
534 auto startTime = std::chrono::steady_clock::now();
540 if (timeLimit != 0) {
541 auto currentTime = std::chrono::steady_clock::now();
542 auto elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>(
543 currentTime - startTime
545 if (elapsedTime > timeLimit) {
546 result.stack = stack;
547 result.stoppedEarly =
true;
553 IMatchResult* r = matchRuleOrInjections(
564 lineTokens->produce(stack, lineLength);
569 const std::vector<IOnigCaptureIndex>& captureIndices = r->captureIndices;
570 RuleId matchedRuleId = r->matchedRuleId;
572 bool hasAdvanced = !captureIndices.empty() &&
573 captureIndices[0].end > linePos;
577 BeginEndRule* poppedRule =
dynamic_cast<BeginEndRule*
>(
578 stack->getRule(grammar)
581 lineTokens->produce(stack, captureIndices[0].start);
582 stack = stack->withContentNameScopesList(stack->nameScopesList);
589 poppedRule->endCaptures,
592 lineTokens->produce(stack, captureIndices[0].end);
595 StateStackImpl* popped = stack;
596 stack = stack->parent;
597 anchorPosition = popped->getAnchorPos();
599 if (!hasAdvanced && popped->getEnterPos() == linePos) {
603 lineTokens->produce(stack, lineLength);
610 Rule* _rule = grammar->getRule(matchedRuleId);
612 lineTokens->produce(stack, captureIndices[0].start);
614 StateStackImpl* beforePush = stack;
617 std::string* scopeName = _rule->getName(
618 &lineText->content(),
621 AttributedScopeStack* nameScopesList =
622 stack->contentNameScopesList->pushAttributed(
623 scopeName ? *scopeName :
"",
630 captureIndices[0].end == lineLength,
638 BeginEndRule* beginEndRule =
dynamic_cast<BeginEndRule*
>(_rule);
646 beginEndRule->beginCaptures,
649 lineTokens->produce(stack, captureIndices[0].end);
650 anchorPosition = captureIndices[0].end;
652 std::string* contentName = beginEndRule->getContentName(
656 AttributedScopeStack* contentNameScopesList =
657 nameScopesList->pushAttributed(
658 contentName ? *contentName :
"",
661 stack = stack->withContentNameScopesList(contentNameScopesList);
665 if (beginEndRule->endHasBackReferences) {
666 std::string endRule = beginEndRule->getEndWithResolvedBackReferences(
670 stack = stack->withEndRule(endRule);
673 if (!hasAdvanced && beforePush->hasSameRuleAs(stack)) {
675 stack = stack->pop();
676 lineTokens->produce(stack, lineLength);
682 BeginWhileRule* beginWhileRule =
dynamic_cast<BeginWhileRule*
>(_rule);
683 if (beginWhileRule) {
690 beginWhileRule->beginCaptures,
693 lineTokens->produce(stack, captureIndices[0].end);
694 anchorPosition = captureIndices[0].end;
696 std::string* contentName = beginWhileRule->getContentName(
700 AttributedScopeStack* contentNameScopesList =
701 nameScopesList->pushAttributed(
702 contentName ? *contentName :
"",
705 stack = stack->withContentNameScopesList(contentNameScopesList);
709 if (beginWhileRule->whileHasBackReferences) {
710 std::string whileRule =
711 beginWhileRule->getWhileWithResolvedBackReferences(
715 stack = stack->withEndRule(whileRule);
718 if (!hasAdvanced && beforePush->hasSameRuleAs(stack)) {
720 stack = stack->pop();
721 lineTokens->produce(stack, lineLength);
728 MatchRule* matchingRule =
dynamic_cast<MatchRule*
>(_rule);
736 matchingRule->captures,
739 lineTokens->produce(stack, captureIndices[0].end);
742 stack = stack->pop();
746 stack = stack->safePop();
747 lineTokens->produce(stack, lineLength);
757 if (!captureIndices.empty() && captureIndices[0].end > linePos) {
759 linePos = captureIndices[0].end;
766 result.stack = stack;
767 result.stoppedEarly =
false;
768 result.linePos = linePos;
769 result.anchorPosition = anchorPosition;
RuleId ruleIdFromNumber(int id)
Convert an integer to a RuleId.
const RuleId END_RULE_ID(-1)
Special rule ID indicating the end of a matched region.
int ruleIdToNumber(RuleId id)
Convert a RuleId to its integer value.
const RuleId WHILE_RULE_ID(-2)
Special rule ID for 'while' rule matching.