2#include "tokenizeString.h"
3#include "encodedTokenAttributes.h"
11StateStackImpl* StateStackImpl::NULL_STATE =
nullptr;
15BalancedBracketSelectors::BalancedBracketSelectors(
16 const std::vector<std::string>& balancedBracketSelectors,
17 const std::vector<std::string>& unbalancedBracketSelectors)
20 if (balancedBracketSelectors.empty() && unbalancedBracketSelectors.empty()) {
28bool BalancedBracketSelectors::matchesAlways()
const {
29 return _allowAny && _unbalancedBracketMatchers.empty();
32bool BalancedBracketSelectors::matchesNever()
const {
33 return !_allowAny && _balancedBracketMatchers.empty();
36bool BalancedBracketSelectors::match(
const std::vector<std::string>& scopes)
const {
44AttributedScopeStack::AttributedScopeStack(
45 AttributedScopeStack* parent_,
46 const ScopeName& scopeName_,
47 EncodedTokenAttributes tokenAttributes_)
48 : parent(parent_), scopeName(scopeName_), tokenAttributes(tokenAttributes_) {
51AttributedScopeStack::~AttributedScopeStack() {
55AttributedScopeStack* AttributedScopeStack::createRoot(
56 const std::string& scopeName,
57 EncodedTokenAttributes tokenAttributes) {
58 return new AttributedScopeStack(
nullptr, scopeName, tokenAttributes);
61AttributedScopeStack* AttributedScopeStack::createRootAndLookUpScopeName(
62 const std::string& scopeName,
63 EncodedTokenAttributes tokenAttributes,
66 BasicScopeAttributes rawMetadata = grammar->getMetadataForScope(scopeName);
69 rawMetadata.languageId,
70 rawMetadata.tokenType,
72 static_cast<int>(FontStyle::NotSet),
77 return new AttributedScopeStack(
nullptr, scopeName, scopeTokenAttributes);
80AttributedScopeStack* AttributedScopeStack::push(
82 const std::string& scopeName) {
84 if (scopeName.empty()) {
88 BasicScopeAttributes rawMetadata = grammar->getMetadataForScope(scopeName);
90 this->tokenAttributes,
91 rawMetadata.languageId,
92 rawMetadata.tokenType,
94 static_cast<int>(FontStyle::NotSet),
99 return new AttributedScopeStack(
this, scopeName, scopeTokenAttributes);
102AttributedScopeStack* AttributedScopeStack::pushAttributed(
103 const std::string& scopePath,
106 if (scopePath.empty()) {
111 if (scopePath.find(
' ') == std::string::npos) {
113 return _pushAttributed(
this, scopePath, grammar);
117 std::vector<std::string> scopes;
118 std::string currentScope;
119 for (
char c : scopePath) {
121 if (!currentScope.empty()) {
122 scopes.push_back(currentScope);
123 currentScope.clear();
129 if (!currentScope.empty()) {
130 scopes.push_back(currentScope);
133 AttributedScopeStack* result =
this;
134 for (
const std::string& scope : scopes) {
135 result = _pushAttributed(result, scope, grammar);
140AttributedScopeStack* AttributedScopeStack::_pushAttributed(
141 AttributedScopeStack* target,
142 const std::string& scopeName,
145 if (scopeName.empty()) {
149 BasicScopeAttributes rawMetadata = grammar->getMetadataForScope(scopeName);
152 IThemeProvider* themeProvider = grammar->getThemeProvider();
153 StyleAttributes* defaultStyle = themeProvider->getDefaults();
157 target->tokenAttributes,
158 rawMetadata.languageId,
159 rawMetadata.tokenType,
161 defaultStyle->fontStyle,
162 defaultStyle->foregroundId,
163 defaultStyle->backgroundId
166 return new AttributedScopeStack(target, scopeName, metadata);
169std::vector<std::string> AttributedScopeStack::getScopeNames()
const {
170 std::vector<std::string> result;
171 const AttributedScopeStack* current =
this;
173 result.push_back(current->scopeName);
174 current = current->parent;
176 std::reverse(result.begin(), result.end());
180bool AttributedScopeStack::equals(AttributedScopeStack* a, AttributedScopeStack* b) {
181 if (a == b)
return true;
182 if (!a || !b)
return false;
185 if (a->scopeName != b->scopeName || a->tokenAttributes != b->tokenAttributes) {
192 return a ==
nullptr && b ==
nullptr;
197StateStackImpl::StateStackImpl(
198 StateStackImpl* parent_,
202 bool beginRuleCapturedEOL_,
203 const std::string* endRule_,
204 AttributedScopeStack* nameScopesList_,
205 AttributedScopeStack* contentNameScopesList_)
208 _enterPos(enterPos_),
209 _anchorPos(anchorPos_),
210 beginRuleCapturedEOL(beginRuleCapturedEOL_),
211 endRule(endRule_ ? new std::string(*endRule_) : nullptr),
212 nameScopesList(nameScopesList_),
213 contentNameScopesList(contentNameScopesList_) {
215 depth = parent ? parent->depth + 1 : 1;
218StateStackImpl::~StateStackImpl() {
223StateStack* StateStackImpl::clone() {
224 return new StateStackImpl(
229 beginRuleCapturedEOL,
232 contentNameScopesList
236bool StateStackImpl::equals(StateStack* other) {
237 if (
this == other)
return true;
238 if (!other)
return false;
240 StateStackImpl* otherImpl =
dynamic_cast<StateStackImpl*
>(other);
241 if (!otherImpl)
return false;
245 if (_enterPos != otherImpl->_enterPos)
return false;
247 bool thisHasEndRule = (endRule !=
nullptr);
248 bool otherHasEndRule = (otherImpl->endRule !=
nullptr);
249 if (thisHasEndRule != otherHasEndRule)
return false;
250 if (thisHasEndRule && *endRule != *otherImpl->endRule)
return false;
252 if (!AttributedScopeStack::equals(nameScopesList, otherImpl->nameScopesList))
return false;
253 if (!AttributedScopeStack::equals(contentNameScopesList, otherImpl->contentNameScopesList))
return false;
256 if (parent ==
nullptr && otherImpl->parent ==
nullptr)
return true;
257 if (parent ==
nullptr || otherImpl->parent ==
nullptr)
return false;
259 return parent->equals(otherImpl->parent);
262void StateStackImpl::reset() {
264 StateStackImpl* el =
this;
272StateStackImpl* StateStackImpl::push(
276 bool beginRuleCapturedEOL,
277 const std::string* endRule,
278 AttributedScopeStack* nameScopesList,
279 AttributedScopeStack* contentNameScopesList) {
281 return new StateStackImpl(
286 beginRuleCapturedEOL,
289 contentNameScopesList
293StateStackImpl* StateStackImpl::pop() {
297StateStackImpl* StateStackImpl::safePop() {
304Rule* StateStackImpl::getRule(Grammar* grammar) {
305 return grammar->getRule(this->ruleId);
308StateStackImpl* StateStackImpl::withContentNameScopesList(AttributedScopeStack* contentNameScopesList) {
309 if (this->contentNameScopesList == contentNameScopesList) {
312 return this->parent->push(
316 this->beginRuleCapturedEOL,
318 this->nameScopesList,
319 contentNameScopesList
323StateStackImpl* StateStackImpl::withEndRule(
const std::string& endRule) {
324 if (this->endRule && *this->endRule == endRule) {
327 return new StateStackImpl(
332 this->beginRuleCapturedEOL,
334 this->nameScopesList,
335 this->contentNameScopesList
339bool StateStackImpl::hasSameRuleAs(StateStackImpl* other) {
340 StateStackImpl* el =
this;
341 while (el && el->_enterPos == other->_enterPos) {
350std::string StateStackImpl::toString()
const {
351 std::string result =
"StateStack[";
352 const StateStackImpl* current =
this;
354 result +=
"Rule#" + std::to_string(
ruleIdToNumber(current->ruleId));
355 if (current->parent) result +=
", ";
356 current = current->parent;
364LineTokens::LineTokens(
365 bool emitBinaryTokens,
366 const std::string& lineText,
367 const std::vector<TokenTypeMatcher>& tokenTypeMatchers,
368 BalancedBracketSelectors* balancedBracketSelectors)
369 : _emitBinaryTokens(emitBinaryTokens),
371 _tokenTypeMatchers(tokenTypeMatchers),
372 _balancedBracketSelectors(balancedBracketSelectors),
373 _lastTokenEndIndex(0) {
376void LineTokens::produce(StateStackImpl* stack,
int endIndex) {
377 produceFromScopes(stack->contentNameScopesList, endIndex);
380void LineTokens::produceFromScopes(AttributedScopeStack* scopesList,
int endIndex) {
381 if (_lastTokenEndIndex >= endIndex) {
385 if (_emitBinaryTokens) {
386 _binaryTokens.push_back(_lastTokenEndIndex);
387 _binaryTokens.push_back(scopesList->tokenAttributes);
388 _lastTokenEndIndex = endIndex;
391 token.startIndex = _lastTokenEndIndex;
392 token.endIndex = endIndex;
393 token.scopes = scopesList->getScopeNames();
394 _tokens.push_back(token);
395 _lastTokenEndIndex = endIndex;
399std::vector<IToken> LineTokens::getResult(StateStackImpl* stack,
int lineLength) {
401 if (!_tokens.empty() && _tokens.back().startIndex == lineLength - 1) {
406 if (_tokens.empty()) {
407 _lastTokenEndIndex = -1;
408 produce(stack, lineLength);
409 if (!_tokens.empty()) {
410 _tokens.back().startIndex = 0;
417std::vector<uint32_t> LineTokens::getBinaryResult(StateStackImpl* stack,
int lineLength) {
418 return _binaryTokens;
424 const ScopeName& rootScopeName,
425 IRawGrammar* grammar,
427 const EmbeddedLanguagesMap* embeddedLanguages,
428 const TokenTypeMap* tokenTypes,
429 BalancedBracketSelectors* balancedBracketSelectors_,
430 IGrammarRepository* grammarRepository,
431 IThemeProvider* themeProvider,
433 : _rootScopeName(rootScopeName),
436 _grammarRepository(grammarRepository),
437 _themeProvider(themeProvider),
439 _injections(nullptr),
440 balancedBracketSelectors(balancedBracketSelectors_),
443 _ruleId2desc.push_back(
nullptr);
445 _basicScopeAttributesProvider =
new BasicScopeAttributesProvider(
450 _grammar = initGrammar(grammar,
nullptr);
454 for (
const auto& pair : *tokenTypes) {
457 TokenTypeMatcher matcher;
458 matcher.type = pair.second;
459 _tokenTypeMatchers.push_back(matcher);
468void Grammar::dispose() {
469 for (
size_t i = 0; i < _ruleId2desc.size(); i++) {
470 auto* rule = _ruleId2desc[i];
476 _ruleId2desc.clear();
478 if (_basicScopeAttributesProvider) {
479 delete _basicScopeAttributesProvider;
480 _basicScopeAttributesProvider =
nullptr;
484 _injections =
nullptr;
486 if (balancedBracketSelectors) {
487 delete balancedBracketSelectors;
488 balancedBracketSelectors =
nullptr;
492OnigScanner* Grammar::createOnigScanner(
const std::vector<std::string>& sources) {
493 return _onigLib->createOnigScanner(sources);
496OnigString* Grammar::createOnigString(
const std::string& str) {
497 return _onigLib->createOnigString(str);
500BasicScopeAttributes Grammar::getMetadataForScope(
const std::string& scope) {
501 return _basicScopeAttributesProvider->getBasicScopeAttributes(&scope);
504Rule* Grammar::getRule(RuleId ruleId) {
506 if (
id >= 0 &&
id <
static_cast<int>(_ruleId2desc.size())) {
507 return _ruleId2desc[id];
512RuleId Grammar::registerRule(Rule* rule) {
513 int id = ++_lastRuleId;
514 if (_ruleId2desc.size() <=
static_cast<size_t>(
id)) {
515 _ruleId2desc.resize(
id + 1,
nullptr);
517 _ruleId2desc[id] = rule;
521RuleId Grammar::allocateRuleId() {
522 int id = ++_lastRuleId;
523 if (_ruleId2desc.size() <=
static_cast<size_t>(
id)) {
524 _ruleId2desc.resize(
id + 1,
nullptr);
529void Grammar::setRule(RuleId ruleId, Rule* rule) {
531 if (
id >= 0 &&
id <
static_cast<int>(_ruleId2desc.size())) {
532 _ruleId2desc[id] = rule;
536IRawGrammar* Grammar::getExternalGrammar(
const std::string& scopeName, IRawRepository* repository) {
537 auto it = _includedGrammars.find(scopeName);
538 if (it != _includedGrammars.end()) {
542 if (_grammarRepository) {
543 IRawGrammar* rawIncludedGrammar = _grammarRepository->lookup(scopeName);
544 if (rawIncludedGrammar) {
545 IRawRule* base = (repository && repository->baseRule) ? repository->baseRule : nullptr;
546 _includedGrammars[scopeName] = initGrammar(rawIncludedGrammar, base);
547 return _includedGrammars[scopeName];
556std::vector<Injection> Grammar::getInjections() {
557 if (_injections ==
nullptr) {
558 _injections =
new std::vector<Injection>(_collectInjections());
564static bool scopesAreMatching(
const std::string& thisScopeName,
const std::string& scopeName) {
565 if (thisScopeName.empty()) {
568 if (thisScopeName == scopeName) {
571 size_t len = scopeName.length();
572 return thisScopeName.length() > len &&
573 thisScopeName.substr(0, len) == scopeName &&
574 thisScopeName[len] ==
'.';
578static bool nameMatcher(
const std::vector<std::string>& identifiers,
579 const std::vector<std::string>& scopes) {
580 if (scopes.size() < identifiers.size()) {
583 size_t lastIndex = 0;
584 for (
const auto& identifier : identifiers) {
586 for (
size_t i = lastIndex; i < scopes.size(); i++) {
587 if (scopesAreMatching(scopes[i], identifier)) {
601static void collectInjections(std::vector<Injection>& result,
602 const std::string& selector,
605 IRawGrammar* grammarDef) {
611 auto matchers = createMatchers<std::vector<std::string>>(selector, nameMatcher);
614 RuleId ruleId = RuleFactory::getCompiledRuleId(rule, grammar, grammarDef->repository);
617 for (
const auto& matcherWithPriority : matchers) {
619 injection.debugSelector = selector;
620 injection.matcher = matcherWithPriority.matcher;
621 injection.ruleId = ruleId;
622 injection.grammar = grammarDef;
623 injection.priority = matcherWithPriority.priority;
624 result.push_back(injection);
628std::vector<Injection> Grammar::_collectInjections() {
629 std::vector<Injection> result;
632 IRawGrammar* grammar = _grammar;
638 if (grammar->injections) {
639 for (
const auto& pair : *grammar->injections) {
640 const std::string& expression = pair.first;
641 IRawRule* rule = pair.second;
642 collectInjections(result, expression, rule,
this, grammar);
647 if (_grammarRepository) {
648 std::vector<std::string> injectionScopeNames = _grammarRepository->injections(_rootScopeName);
649 for (
const auto& injectionScopeName : injectionScopeNames) {
650 IRawGrammar* injectionGrammar = getExternalGrammar(injectionScopeName,
nullptr);
651 if (injectionGrammar) {
652 const std::string* selector = injectionGrammar->injectionSelector;
653 if (selector && !selector->empty()) {
656 IRawRule* injectionRule = (injectionGrammar->repository && injectionGrammar->repository->selfRule)
657 ? injectionGrammar->repository->selfRule
659 collectInjections(result, *selector, injectionRule,
this, injectionGrammar);
666 std::sort(result.begin(), result.end(), [](
const Injection& a,
const Injection& b) {
667 return a.priority < b.priority;
673ITokenizeLineResult Grammar::tokenizeLine(
674 const std::string& lineText,
675 StateStack* prevState,
678 StateStackImpl* prevStateImpl =
dynamic_cast<StateStackImpl*
>(prevState);
679 TokenizeResult r = _tokenize(lineText, prevStateImpl,
false, timeLimit);
681 ITokenizeLineResult result;
682 result.tokens = r.lineTokens->getResult(r.ruleStack, r.lineLength);
683 result.ruleStack = r.ruleStack;
684 result.stoppedEarly = r.stoppedEarly;
690ITokenizeLineResult2 Grammar::tokenizeLine2(
691 const std::string& lineText,
692 StateStack* prevState,
695 StateStackImpl* prevStateImpl =
dynamic_cast<StateStackImpl*
>(prevState);
696 TokenizeResult r = _tokenize(lineText, prevStateImpl,
true, timeLimit);
698 ITokenizeLineResult2 result;
699 result.tokens = r.lineTokens->getBinaryResult(r.ruleStack, r.lineLength);
700 result.ruleStack = r.ruleStack;
701 result.stoppedEarly = r.stoppedEarly;
707Grammar::TokenizeResult Grammar::_tokenize(
708 const std::string& lineText,
709 StateStackImpl* prevState,
710 bool emitBinaryTokens,
715 _rootId = RuleFactory::getCompiledRuleId(
716 _grammar->repository->selfRule,
724 if (!prevState || prevState == StateStackImpl::NULL_STATE) {
727 BasicScopeAttributes rawDefaultMetadata =
728 _basicScopeAttributesProvider->getDefaultAttributes();
729 StyleAttributes* defaultStyle = _themeProvider->getDefaults();
733 rawDefaultMetadata.languageId,
734 rawDefaultMetadata.tokenType,
736 defaultStyle->fontStyle,
737 defaultStyle->foregroundId,
738 defaultStyle->backgroundId
741 Rule* rootRule = getRule(_rootId);
742 std::string* rootScopeName = rootRule ? rootRule->getName(
nullptr,
nullptr) :
nullptr;
744 AttributedScopeStack* scopeList;
746 scopeList = AttributedScopeStack::createRootAndLookUpScopeName(
751 delete rootScopeName;
753 scopeList = AttributedScopeStack::createRoot(
"unknown", defaultMetadata);
756 prevState =
new StateStackImpl(
771 std::string lineTextWithNewline = lineText +
"\n";
772 OnigString* onigLineText = createOnigString(lineTextWithNewline);
773 int lineLength = onigLineText->content().length();
775 LineTokens* lineTokens =
new LineTokens(
779 balancedBracketSelectors
782 StackElement resultStack = tokenizeString(
793 disposeOnigString(onigLineText);
795 TokenizeResult result;
796 result.lineLength = lineLength;
797 result.lineTokens = lineTokens;
798 result.ruleStack = resultStack.stack;
799 result.stoppedEarly = resultStack.stoppedEarly;
806Grammar* createGrammar(
807 const ScopeName& scopeName,
808 IRawGrammar* grammar,
810 const EmbeddedLanguagesMap* embeddedLanguages,
811 const TokenTypeMap* tokenTypes,
812 BalancedBracketSelectors* balancedBracketSelectors,
813 IGrammarRepository* grammarRepository,
814 IThemeProvider* themeProvider,
823 balancedBracketSelectors,
830IRawGrammar* initGrammar(IRawGrammar* grammar, IRawRule* base) {
832 if (!grammar->repository) {
833 grammar->repository =
new IRawRepository();
837 IRawRule* selfRule =
new IRawRule();
840 if (grammar->patterns && !grammar->patterns->empty()) {
841 selfRule->patterns =
new std::vector<IRawRule*>(*grammar->patterns);
844 grammar->patterns->clear();
847 selfRule->name =
new std::string(grammar->scopeName);
849 grammar->repository->selfRule = selfRule;
852 grammar->repository->baseRule = base ? base : selfRule;
RuleId ruleIdFromNumber(int id)
Convert an integer to a RuleId.
int ruleIdToNumber(RuleId id)
Convert a RuleId to its integer value.
int32_t EncodedTokenAttributes
Compact 32-bit encoding of a token's attributes.