TextMateLib 1.0
Modern C++ implementation of the TextMate syntax highlighting engine
Loading...
Searching...
No Matches
grammar.cpp
1#include "grammar.h"
2#include "tokenizeString.h"
3#include "encodedTokenAttributes.h"
4#include "matcher.h"
5#include <algorithm>
6#include <iostream>
7
8namespace tml {
9
10// Static member initialization
11StateStackImpl* StateStackImpl::NULL_STATE = nullptr;
12
13// BalancedBracketSelectors implementation
14
15BalancedBracketSelectors::BalancedBracketSelectors(
16 const std::vector<std::string>& balancedBracketSelectors,
17 const std::vector<std::string>& unbalancedBracketSelectors)
18 : _allowAny(false) {
19
20 if (balancedBracketSelectors.empty() && unbalancedBracketSelectors.empty()) {
21 _allowAny = true;
22 }
23
24 // Create matchers (simplified - full implementation would use createMatchers)
25 // For now, store the selectors
26}
27
28bool BalancedBracketSelectors::matchesAlways() const {
29 return _allowAny && _unbalancedBracketMatchers.empty();
30}
31
32bool BalancedBracketSelectors::matchesNever() const {
33 return !_allowAny && _balancedBracketMatchers.empty();
34}
35
36bool BalancedBracketSelectors::match(const std::vector<std::string>& scopes) const {
37 // Simple implementation: returns true if balanced brackets are enabled globally.
38 // Full implementation would match against specific scope selectors in _balancedBracketMatchers.
39 return _allowAny;
40}
41
42// AttributedScopeStack implementation
43
44AttributedScopeStack::AttributedScopeStack(
45 AttributedScopeStack* parent_,
46 const ScopeName& scopeName_,
47 EncodedTokenAttributes tokenAttributes_)
48 : parent(parent_), scopeName(scopeName_), tokenAttributes(tokenAttributes_) {
49}
50
51AttributedScopeStack::~AttributedScopeStack() {
52 // Don't delete parent - it's managed separately
53}
54
55AttributedScopeStack* AttributedScopeStack::createRoot(
56 const std::string& scopeName,
57 EncodedTokenAttributes tokenAttributes) {
58 return new AttributedScopeStack(nullptr, scopeName, tokenAttributes);
59}
60
61AttributedScopeStack* AttributedScopeStack::createRootAndLookUpScopeName(
62 const std::string& scopeName,
63 EncodedTokenAttributes tokenAttributes,
64 Grammar* grammar) {
65
66 BasicScopeAttributes rawMetadata = grammar->getMetadataForScope(scopeName);
67 EncodedTokenAttributes scopeTokenAttributes = EncodedTokenAttributesHelper::set(
68 tokenAttributes,
69 rawMetadata.languageId,
70 rawMetadata.tokenType,
71 nullptr,
72 static_cast<int>(FontStyle::NotSet),
73 0,
74 0
75 );
76
77 return new AttributedScopeStack(nullptr, scopeName, scopeTokenAttributes);
78}
79
80AttributedScopeStack* AttributedScopeStack::push(
81 Grammar* grammar,
82 const std::string& scopeName) {
83
84 if (scopeName.empty()) {
85 return this;
86 }
87
88 BasicScopeAttributes rawMetadata = grammar->getMetadataForScope(scopeName);
89 EncodedTokenAttributes scopeTokenAttributes = EncodedTokenAttributesHelper::set(
90 this->tokenAttributes,
91 rawMetadata.languageId,
92 rawMetadata.tokenType,
93 nullptr,
94 static_cast<int>(FontStyle::NotSet),
95 0,
96 0
97 );
98
99 return new AttributedScopeStack(this, scopeName, scopeTokenAttributes);
100}
101
102AttributedScopeStack* AttributedScopeStack::pushAttributed(
103 const std::string& scopePath,
104 Grammar* grammar) {
105
106 if (scopePath.empty()) {
107 return this;
108 }
109
110 // Check if scopePath contains spaces (multiple scopes)
111 if (scopePath.find(' ') == std::string::npos) {
112 // This is the common case and much faster - single scope
113 return _pushAttributed(this, scopePath, grammar);
114 }
115
116 // Split by spaces and push each scope
117 std::vector<std::string> scopes;
118 std::string currentScope;
119 for (char c : scopePath) {
120 if (c == ' ') {
121 if (!currentScope.empty()) {
122 scopes.push_back(currentScope);
123 currentScope.clear();
124 }
125 } else {
126 currentScope += c;
127 }
128 }
129 if (!currentScope.empty()) {
130 scopes.push_back(currentScope);
131 }
132
133 AttributedScopeStack* result = this;
134 for (const std::string& scope : scopes) {
135 result = _pushAttributed(result, scope, grammar);
136 }
137 return result;
138}
139
140AttributedScopeStack* AttributedScopeStack::_pushAttributed(
141 AttributedScopeStack* target,
142 const std::string& scopeName,
143 Grammar* grammar) {
144
145 if (scopeName.empty()) {
146 return target;
147 }
148
149 BasicScopeAttributes rawMetadata = grammar->getMetadataForScope(scopeName);
150
151 // Get theme match result
152 IThemeProvider* themeProvider = grammar->getThemeProvider();
153 StyleAttributes* defaultStyle = themeProvider->getDefaults();
154
155 // Merge attributes
156 EncodedTokenAttributes metadata = EncodedTokenAttributesHelper::set(
157 target->tokenAttributes,
158 rawMetadata.languageId,
159 rawMetadata.tokenType,
160 nullptr,
161 defaultStyle->fontStyle,
162 defaultStyle->foregroundId,
163 defaultStyle->backgroundId
164 );
165
166 return new AttributedScopeStack(target, scopeName, metadata);
167}
168
169std::vector<std::string> AttributedScopeStack::getScopeNames() const {
170 std::vector<std::string> result;
171 const AttributedScopeStack* current = this;
172 while (current) {
173 result.push_back(current->scopeName);
174 current = current->parent;
175 }
176 std::reverse(result.begin(), result.end());
177 return result;
178}
179
180bool AttributedScopeStack::equals(AttributedScopeStack* a, AttributedScopeStack* b) {
181 if (a == b) return true;
182 if (!a || !b) return false;
183
184 while (a && b) {
185 if (a->scopeName != b->scopeName || a->tokenAttributes != b->tokenAttributes) {
186 return false;
187 }
188 a = a->parent;
189 b = b->parent;
190 }
191
192 return a == nullptr && b == nullptr;
193}
194
195// StateStackImpl implementation
196
197StateStackImpl::StateStackImpl(
198 StateStackImpl* parent_,
199 RuleId ruleId_,
200 int enterPos_,
201 int anchorPos_,
202 bool beginRuleCapturedEOL_,
203 const std::string* endRule_,
204 AttributedScopeStack* nameScopesList_,
205 AttributedScopeStack* contentNameScopesList_)
206 : parent(parent_),
207 ruleId(ruleId_),
208 _enterPos(enterPos_),
209 _anchorPos(anchorPos_),
210 beginRuleCapturedEOL(beginRuleCapturedEOL_),
211 endRule(endRule_ ? new std::string(*endRule_) : nullptr),
212 nameScopesList(nameScopesList_),
213 contentNameScopesList(contentNameScopesList_) {
214
215 depth = parent ? parent->depth + 1 : 1;
216}
217
218StateStackImpl::~StateStackImpl() {
219 delete endRule;
220 // Don't delete nameScopesList and contentNameScopesList - managed separately
221}
222
223StateStack* StateStackImpl::clone() {
224 return new StateStackImpl(
225 parent,
226 ruleId,
227 _enterPos,
228 _anchorPos,
229 beginRuleCapturedEOL,
230 endRule,
231 nameScopesList,
232 contentNameScopesList
233 );
234}
235
236bool StateStackImpl::equals(StateStack* other) {
237 if (this == other) return true;
238 if (!other) return false;
239
240 StateStackImpl* otherImpl = dynamic_cast<StateStackImpl*>(other);
241 if (!otherImpl) return false;
242
243 // Compare all fields
244 if (ruleIdToNumber(ruleId) != ruleIdToNumber(otherImpl->ruleId)) return false;
245 if (_enterPos != otherImpl->_enterPos) return false;
246
247 bool thisHasEndRule = (endRule != nullptr);
248 bool otherHasEndRule = (otherImpl->endRule != nullptr);
249 if (thisHasEndRule != otherHasEndRule) return false;
250 if (thisHasEndRule && *endRule != *otherImpl->endRule) return false;
251
252 if (!AttributedScopeStack::equals(nameScopesList, otherImpl->nameScopesList)) return false;
253 if (!AttributedScopeStack::equals(contentNameScopesList, otherImpl->contentNameScopesList)) return false;
254
255 // Compare parents recursively
256 if (parent == nullptr && otherImpl->parent == nullptr) return true;
257 if (parent == nullptr || otherImpl->parent == nullptr) return false;
258
259 return parent->equals(otherImpl->parent);
260}
261
262void StateStackImpl::reset() {
263 // Reset enter and anchor positions
264 StateStackImpl* el = this;
265 while (el) {
266 el->_enterPos = -1;
267 el->_anchorPos = -1;
268 el = el->parent;
269 }
270}
271
272StateStackImpl* StateStackImpl::push(
273 RuleId ruleId,
274 int enterPos,
275 int anchorPos,
276 bool beginRuleCapturedEOL,
277 const std::string* endRule,
278 AttributedScopeStack* nameScopesList,
279 AttributedScopeStack* contentNameScopesList) {
280
281 return new StateStackImpl(
282 this,
283 ruleId,
284 enterPos,
285 anchorPos,
286 beginRuleCapturedEOL,
287 endRule,
288 nameScopesList,
289 contentNameScopesList
290 );
291}
292
293StateStackImpl* StateStackImpl::pop() {
294 return this->parent;
295}
296
297StateStackImpl* StateStackImpl::safePop() {
298 if (this->parent) {
299 return this->parent;
300 }
301 return this;
302}
303
304Rule* StateStackImpl::getRule(Grammar* grammar) {
305 return grammar->getRule(this->ruleId);
306}
307
308StateStackImpl* StateStackImpl::withContentNameScopesList(AttributedScopeStack* contentNameScopesList) {
309 if (this->contentNameScopesList == contentNameScopesList) {
310 return this;
311 }
312 return this->parent->push(
313 this->ruleId,
314 this->_enterPos,
315 this->_anchorPos,
316 this->beginRuleCapturedEOL,
317 this->endRule,
318 this->nameScopesList,
319 contentNameScopesList
320 );
321}
322
323StateStackImpl* StateStackImpl::withEndRule(const std::string& endRule) {
324 if (this->endRule && *this->endRule == endRule) {
325 return this;
326 }
327 return new StateStackImpl(
328 this->parent,
329 this->ruleId,
330 this->_enterPos,
331 this->_anchorPos,
332 this->beginRuleCapturedEOL,
333 &endRule,
334 this->nameScopesList,
335 this->contentNameScopesList
336 );
337}
338
339bool StateStackImpl::hasSameRuleAs(StateStackImpl* other) {
340 StateStackImpl* el = this;
341 while (el && el->_enterPos == other->_enterPos) {
342 if (ruleIdToNumber(el->ruleId) == ruleIdToNumber(other->ruleId)) {
343 return true;
344 }
345 el = el->parent;
346 }
347 return false;
348}
349
350std::string StateStackImpl::toString() const {
351 std::string result = "StateStack[";
352 const StateStackImpl* current = this;
353 while (current) {
354 result += "Rule#" + std::to_string(ruleIdToNumber(current->ruleId));
355 if (current->parent) result += ", ";
356 current = current->parent;
357 }
358 result += "]";
359 return result;
360}
361
362// LineTokens implementation
363
364LineTokens::LineTokens(
365 bool emitBinaryTokens,
366 const std::string& lineText,
367 const std::vector<TokenTypeMatcher>& tokenTypeMatchers,
368 BalancedBracketSelectors* balancedBracketSelectors)
369 : _emitBinaryTokens(emitBinaryTokens),
370 _lineText(lineText),
371 _tokenTypeMatchers(tokenTypeMatchers),
372 _balancedBracketSelectors(balancedBracketSelectors),
373 _lastTokenEndIndex(0) {
374}
375
376void LineTokens::produce(StateStackImpl* stack, int endIndex) {
377 produceFromScopes(stack->contentNameScopesList, endIndex);
378}
379
380void LineTokens::produceFromScopes(AttributedScopeStack* scopesList, int endIndex) {
381 if (_lastTokenEndIndex >= endIndex) {
382 return;
383 }
384
385 if (_emitBinaryTokens) {
386 _binaryTokens.push_back(_lastTokenEndIndex);
387 _binaryTokens.push_back(scopesList->tokenAttributes);
388 _lastTokenEndIndex = endIndex;
389 } else {
390 IToken token;
391 token.startIndex = _lastTokenEndIndex;
392 token.endIndex = endIndex;
393 token.scopes = scopesList->getScopeNames();
394 _tokens.push_back(token);
395 _lastTokenEndIndex = endIndex;
396 }
397}
398
399std::vector<IToken> LineTokens::getResult(StateStackImpl* stack, int lineLength) {
400 // Remove token for newline if it exists
401 if (!_tokens.empty() && _tokens.back().startIndex == lineLength - 1) {
402 _tokens.pop_back();
403 }
404
405 // If no tokens, produce one for the entire line
406 if (_tokens.empty()) {
407 _lastTokenEndIndex = -1;
408 produce(stack, lineLength);
409 if (!_tokens.empty()) {
410 _tokens.back().startIndex = 0;
411 }
412 }
413
414 return _tokens;
415}
416
417std::vector<uint32_t> LineTokens::getBinaryResult(StateStackImpl* stack, int lineLength) {
418 return _binaryTokens;
419}
420
421// Grammar implementation
422
423Grammar::Grammar(
424 const ScopeName& rootScopeName,
425 IRawGrammar* grammar,
426 int initialLanguage,
427 const EmbeddedLanguagesMap* embeddedLanguages,
428 const TokenTypeMap* tokenTypes,
429 BalancedBracketSelectors* balancedBracketSelectors_,
430 IGrammarRepository* grammarRepository,
431 IThemeProvider* themeProvider,
432 IOnigLib* onigLib)
433 : _rootScopeName(rootScopeName),
434 _rootId(ruleIdFromNumber(-1)),
435 _lastRuleId(0),
436 _grammarRepository(grammarRepository),
437 _themeProvider(themeProvider),
438 _grammar(grammar),
439 _injections(nullptr),
440 balancedBracketSelectors(balancedBracketSelectors_),
441 _onigLib(onigLib) {
442
443 _ruleId2desc.push_back(nullptr); // Index 0 is null
444
445 _basicScopeAttributesProvider = new BasicScopeAttributesProvider(
446 initialLanguage,
447 embeddedLanguages
448 );
449
450 _grammar = initGrammar(grammar, nullptr);
451
452 // Build token type matchers
453 if (tokenTypes) {
454 for (const auto& pair : *tokenTypes) {
455 // Simple implementation: creates basic matchers without selector parsing.
456 // Full implementation would use createMatchers to parse scope selectors.
457 TokenTypeMatcher matcher;
458 matcher.type = pair.second;
459 _tokenTypeMatchers.push_back(matcher);
460 }
461 }
462}
463
464Grammar::~Grammar() {
465 dispose();
466}
467
468void Grammar::dispose() {
469 for (size_t i = 0; i < _ruleId2desc.size(); i++) {
470 auto* rule = _ruleId2desc[i];
471 if (rule) {
472 rule->dispose();
473 delete rule;
474 }
475 }
476 _ruleId2desc.clear();
477
478 if (_basicScopeAttributesProvider) {
479 delete _basicScopeAttributesProvider;
480 _basicScopeAttributesProvider = nullptr;
481 }
482 if (_injections) {
483 delete _injections;
484 _injections = nullptr;
485 }
486 if (balancedBracketSelectors) {
487 delete balancedBracketSelectors;
488 balancedBracketSelectors = nullptr;
489 }
490}
491
492OnigScanner* Grammar::createOnigScanner(const std::vector<std::string>& sources) {
493 return _onigLib->createOnigScanner(sources);
494}
495
496OnigString* Grammar::createOnigString(const std::string& str) {
497 return _onigLib->createOnigString(str);
498}
499
500BasicScopeAttributes Grammar::getMetadataForScope(const std::string& scope) {
501 return _basicScopeAttributesProvider->getBasicScopeAttributes(&scope);
502}
503
504Rule* Grammar::getRule(RuleId ruleId) {
505 int id = ruleIdToNumber(ruleId);
506 if (id >= 0 && id < static_cast<int>(_ruleId2desc.size())) {
507 return _ruleId2desc[id];
508 }
509 return nullptr;
510}
511
512RuleId Grammar::registerRule(Rule* rule) {
513 int id = ++_lastRuleId;
514 if (_ruleId2desc.size() <= static_cast<size_t>(id)) {
515 _ruleId2desc.resize(id + 1, nullptr);
516 }
517 _ruleId2desc[id] = rule;
518 return ruleIdFromNumber(id);
519}
520
521RuleId Grammar::allocateRuleId() {
522 int id = ++_lastRuleId;
523 if (_ruleId2desc.size() <= static_cast<size_t>(id)) {
524 _ruleId2desc.resize(id + 1, nullptr);
525 }
526 return ruleIdFromNumber(id);
527}
528
529void Grammar::setRule(RuleId ruleId, Rule* rule) {
530 int id = ruleIdToNumber(ruleId);
531 if (id >= 0 && id < static_cast<int>(_ruleId2desc.size())) {
532 _ruleId2desc[id] = rule;
533 }
534}
535
536IRawGrammar* Grammar::getExternalGrammar(const std::string& scopeName, IRawRepository* repository) {
537 auto it = _includedGrammars.find(scopeName);
538 if (it != _includedGrammars.end()) {
539 return it->second;
540 }
541
542 if (_grammarRepository) {
543 IRawGrammar* rawIncludedGrammar = _grammarRepository->lookup(scopeName);
544 if (rawIncludedGrammar) {
545 IRawRule* base = (repository && repository->baseRule) ? repository->baseRule : nullptr;
546 _includedGrammars[scopeName] = initGrammar(rawIncludedGrammar, base);
547 return _includedGrammars[scopeName];
548 } else {
549 }
550 } else {
551 }
552
553 return nullptr;
554}
555
556std::vector<Injection> Grammar::getInjections() {
557 if (_injections == nullptr) {
558 _injections = new std::vector<Injection>(_collectInjections());
559 }
560 return *_injections;
561}
562
563// Helper function: Check if two scope names match (exact or prefix match)
564static bool scopesAreMatching(const std::string& thisScopeName, const std::string& scopeName) {
565 if (thisScopeName.empty()) {
566 return false;
567 }
568 if (thisScopeName == scopeName) {
569 return true;
570 }
571 size_t len = scopeName.length();
572 return thisScopeName.length() > len &&
573 thisScopeName.substr(0, len) == scopeName &&
574 thisScopeName[len] == '.';
575}
576
577// Helper function: Match identifiers against scopes
578static bool nameMatcher(const std::vector<std::string>& identifiers,
579 const std::vector<std::string>& scopes) {
580 if (scopes.size() < identifiers.size()) {
581 return false;
582 }
583 size_t lastIndex = 0;
584 for (const auto& identifier : identifiers) {
585 bool found = false;
586 for (size_t i = lastIndex; i < scopes.size(); i++) {
587 if (scopesAreMatching(scopes[i], identifier)) {
588 lastIndex = i + 1;
589 found = true;
590 break;
591 }
592 }
593 if (!found) {
594 return false;
595 }
596 }
597 return true;
598}
599
600// Helper function: Collect injections from a single injection rule
601static void collectInjections(std::vector<Injection>& result,
602 const std::string& selector,
603 IRawRule* rule,
604 Grammar* grammar,
605 IRawGrammar* grammarDef) {
606 if (!rule) {
607 return;
608 }
609
610 // Create matchers from the selector
611 auto matchers = createMatchers<std::vector<std::string>>(selector, nameMatcher);
612
613 // Get the compiled rule ID
614 RuleId ruleId = RuleFactory::getCompiledRuleId(rule, grammar, grammarDef->repository);
615
616 // Add an injection for each matcher
617 for (const auto& matcherWithPriority : matchers) {
618 Injection injection;
619 injection.debugSelector = selector;
620 injection.matcher = matcherWithPriority.matcher;
621 injection.ruleId = ruleId;
622 injection.grammar = grammarDef;
623 injection.priority = matcherWithPriority.priority;
624 result.push_back(injection);
625 }
626}
627
628std::vector<Injection> Grammar::_collectInjections() {
629 std::vector<Injection> result;
630
631 // Get the current grammar
632 IRawGrammar* grammar = _grammar;
633 if (!grammar) {
634 return result;
635 }
636
637 // Add injections from the current grammar
638 if (grammar->injections) {
639 for (const auto& pair : *grammar->injections) {
640 const std::string& expression = pair.first;
641 IRawRule* rule = pair.second;
642 collectInjections(result, expression, rule, this, grammar);
643 }
644 }
645
646 // Add injection grammars contributed for the current scope
647 if (_grammarRepository) {
648 std::vector<std::string> injectionScopeNames = _grammarRepository->injections(_rootScopeName);
649 for (const auto& injectionScopeName : injectionScopeNames) {
650 IRawGrammar* injectionGrammar = getExternalGrammar(injectionScopeName, nullptr);
651 if (injectionGrammar) {
652 const std::string* selector = injectionGrammar->injectionSelector;
653 if (selector && !selector->empty()) {
654 // Use the injection grammar's $self rule which contains the patterns
655 // After initGrammar, the patterns are moved to repository->selfRule
656 IRawRule* injectionRule = (injectionGrammar->repository && injectionGrammar->repository->selfRule)
657 ? injectionGrammar->repository->selfRule
658 : injectionGrammar;
659 collectInjections(result, *selector, injectionRule, this, injectionGrammar);
660 }
661 }
662 }
663 }
664
665 // Sort by priority
666 std::sort(result.begin(), result.end(), [](const Injection& a, const Injection& b) {
667 return a.priority < b.priority;
668 });
669
670 return result;
671}
672
673ITokenizeLineResult Grammar::tokenizeLine(
674 const std::string& lineText,
675 StateStack* prevState,
676 int timeLimit) {
677
678 StateStackImpl* prevStateImpl = dynamic_cast<StateStackImpl*>(prevState);
679 TokenizeResult r = _tokenize(lineText, prevStateImpl, false, timeLimit);
680
681 ITokenizeLineResult result;
682 result.tokens = r.lineTokens->getResult(r.ruleStack, r.lineLength);
683 result.ruleStack = r.ruleStack;
684 result.stoppedEarly = r.stoppedEarly;
685
686 delete r.lineTokens;
687 return result;
688}
689
690ITokenizeLineResult2 Grammar::tokenizeLine2(
691 const std::string& lineText,
692 StateStack* prevState,
693 int timeLimit) {
694
695 StateStackImpl* prevStateImpl = dynamic_cast<StateStackImpl*>(prevState);
696 TokenizeResult r = _tokenize(lineText, prevStateImpl, true, timeLimit);
697
698 ITokenizeLineResult2 result;
699 result.tokens = r.lineTokens->getBinaryResult(r.ruleStack, r.lineLength);
700 result.ruleStack = r.ruleStack;
701 result.stoppedEarly = r.stoppedEarly;
702
703 delete r.lineTokens;
704 return result;
705}
706
707Grammar::TokenizeResult Grammar::_tokenize(
708 const std::string& lineText,
709 StateStackImpl* prevState,
710 bool emitBinaryTokens,
711 int timeLimit) {
712
713 // Initialize root rule if needed
714 if (ruleIdToNumber(_rootId) == -1) {
715 _rootId = RuleFactory::getCompiledRuleId(
716 _grammar->repository->selfRule,
717 this,
718 _grammar->repository
719 );
720 getInjections();
721 }
722
723 bool isFirstLine;
724 if (!prevState || prevState == StateStackImpl::NULL_STATE) {
725 isFirstLine = true;
726
727 BasicScopeAttributes rawDefaultMetadata =
728 _basicScopeAttributesProvider->getDefaultAttributes();
729 StyleAttributes* defaultStyle = _themeProvider->getDefaults();
730
731 EncodedTokenAttributes defaultMetadata = EncodedTokenAttributesHelper::set(
732 0,
733 rawDefaultMetadata.languageId,
734 rawDefaultMetadata.tokenType,
735 nullptr,
736 defaultStyle->fontStyle,
737 defaultStyle->foregroundId,
738 defaultStyle->backgroundId
739 );
740
741 Rule* rootRule = getRule(_rootId);
742 std::string* rootScopeName = rootRule ? rootRule->getName(nullptr, nullptr) : nullptr;
743
744 AttributedScopeStack* scopeList;
745 if (rootScopeName) {
746 scopeList = AttributedScopeStack::createRootAndLookUpScopeName(
747 *rootScopeName,
748 defaultMetadata,
749 this
750 );
751 delete rootScopeName;
752 } else {
753 scopeList = AttributedScopeStack::createRoot("unknown", defaultMetadata);
754 }
755
756 prevState = new StateStackImpl(
757 nullptr,
758 _rootId,
759 -1,
760 -1,
761 false,
762 nullptr,
763 scopeList,
764 scopeList
765 );
766 } else {
767 isFirstLine = false;
768 prevState->reset();
769 }
770
771 std::string lineTextWithNewline = lineText + "\n";
772 OnigString* onigLineText = createOnigString(lineTextWithNewline);
773 int lineLength = onigLineText->content().length();
774
775 LineTokens* lineTokens = new LineTokens(
776 emitBinaryTokens,
777 lineTextWithNewline,
778 _tokenTypeMatchers,
779 balancedBracketSelectors
780 );
781
782 StackElement resultStack = tokenizeString(
783 this,
784 onigLineText,
785 isFirstLine,
786 0,
787 prevState,
788 lineTokens,
789 true,
790 timeLimit
791 );
792
793 disposeOnigString(onigLineText);
794
795 TokenizeResult result;
796 result.lineLength = lineLength;
797 result.lineTokens = lineTokens;
798 result.ruleStack = resultStack.stack;
799 result.stoppedEarly = resultStack.stoppedEarly;
800
801 return result;
802}
803
804// Helper functions
805
806Grammar* createGrammar(
807 const ScopeName& scopeName,
808 IRawGrammar* grammar,
809 int initialLanguage,
810 const EmbeddedLanguagesMap* embeddedLanguages,
811 const TokenTypeMap* tokenTypes,
812 BalancedBracketSelectors* balancedBracketSelectors,
813 IGrammarRepository* grammarRepository,
814 IThemeProvider* themeProvider,
815 IOnigLib* onigLib) {
816
817 return new Grammar(
818 scopeName,
819 grammar,
820 initialLanguage,
821 embeddedLanguages,
822 tokenTypes,
823 balancedBracketSelectors,
824 grammarRepository,
825 themeProvider,
826 onigLib
827 );
828}
829
830IRawGrammar* initGrammar(IRawGrammar* grammar, IRawRule* base) {
831 // Create repository if it doesn't exist
832 if (!grammar->repository) {
833 grammar->repository = new IRawRepository();
834 }
835
836 // Create $self rule with grammar's patterns and scope name
837 IRawRule* selfRule = new IRawRule();
838 // Transfer ownership of patterns from grammar to $self rule
839 // This avoids double-free when both grammar and selfRule are destroyed
840 if (grammar->patterns && !grammar->patterns->empty()) {
841 selfRule->patterns = new std::vector<IRawRule*>(*grammar->patterns);
842 // Clear grammar->patterns so we don't have shared ownership
843 // The IRawRule* objects are now owned only by selfRule->patterns
844 grammar->patterns->clear();
845 }
846 // Set name to grammar's scopeName
847 selfRule->name = new std::string(grammar->scopeName);
848
849 grammar->repository->selfRule = selfRule;
850
851 // Create $base rule
852 grammar->repository->baseRule = base ? base : selfRule;
853
854 return grammar;
855}
856
857} // namespace tml
RuleId ruleIdFromNumber(int id)
Convert an integer to a RuleId.
Definition types.h:109
int ruleIdToNumber(RuleId id)
Convert a RuleId to its integer value.
Definition types.h:116
int32_t EncodedTokenAttributes
Compact 32-bit encoding of a token's attributes.
Definition types.h:128