2#include "grammarDependencies.h"
10CompiledRule::CompiledRule() : scanner(nullptr) {}
12CompiledRule::~CompiledRule() {
16void CompiledRule::dispose() {
17 if (scanner !=
nullptr) {
26RegExpSourceList::RegExpSourceList()
27 : _hasAnchors(false), _cached(nullptr),
28 _anchorCache_A0_G0(nullptr), _anchorCache_A0_G1(nullptr),
29 _anchorCache_A1_G0(nullptr), _anchorCache_A1_G1(nullptr) {
32RegExpSourceList::~RegExpSourceList() {
36void RegExpSourceList::push(RegexSource* item) {
37 _items.push_back(item);
38 if (item->hasAnchor) {
43void RegExpSourceList::unshift(RegexSource* item) {
44 _items.insert(_items.begin(), item);
45 if (item->hasAnchor) {
50void RegExpSourceList::setSource(
int index,
const std::string& newSource) {
51 if (index >= 0 && index <
static_cast<int>(_items.size())) {
52 if (_items[index]->source != newSource) {
55 _items[index]->source = newSource;
60int RegExpSourceList::length()
const {
64CompiledRule* RegExpSourceList::compile(IOnigLib* onigLib) {
66 std::vector<std::string> sources;
67 for (
auto* item : _items) {
68 sources.push_back(item->source);
71 _cached =
new CompiledRule();
72 _cached->scanner = onigLib->createOnigScanner(sources);
74 for (
auto* item : _items) {
75 _cached->rules.push_back(item->ruleId);
81CompiledRule* RegExpSourceList::compileAG(IOnigLib* onigLib,
bool allowA,
bool allowG) {
85 return compile(onigLib);
89 CompiledRule** cacheSlot =
nullptr;
92 cacheSlot = &_anchorCache_A1_G1;
94 cacheSlot = &_anchorCache_A1_G0;
98 cacheSlot = &_anchorCache_A0_G1;
100 cacheSlot = &_anchorCache_A0_G0;
104 if (*cacheSlot !=
nullptr) {
109 std::vector<std::string> sources;
110 for (
auto* item : _items) {
111 sources.push_back(item->resolveAnchors(allowA, allowG));
114 CompiledRule* result =
new CompiledRule();
115 result->scanner = onigLib->createOnigScanner(sources);
117 for (
auto* item : _items) {
118 result->rules.push_back(item->ruleId);
125void RegExpSourceList::dispose() {
131 if (_anchorCache_A0_G0) {
132 _anchorCache_A0_G0->dispose();
133 delete _anchorCache_A0_G0;
134 _anchorCache_A0_G0 =
nullptr;
136 if (_anchorCache_A0_G1) {
137 _anchorCache_A0_G1->dispose();
138 delete _anchorCache_A0_G1;
139 _anchorCache_A0_G1 =
nullptr;
141 if (_anchorCache_A1_G0) {
142 _anchorCache_A1_G0->dispose();
143 delete _anchorCache_A1_G0;
144 _anchorCache_A1_G0 =
nullptr;
146 if (_anchorCache_A1_G1) {
147 _anchorCache_A1_G1->dispose();
148 delete _anchorCache_A1_G1;
149 _anchorCache_A1_G1 =
nullptr;
155Rule::Rule(ILocation* location_, RuleId id_,
156 const std::string* name,
const std::string* contentName)
157 : location(location_), id(id_),
158 _name(name ? new std::string(*name) : nullptr),
159 _contentName(contentName ? new std::string(*contentName) : nullptr) {
161 _nameIsCapturing = RegexSource::hasCaptures(_name);
162 _contentNameIsCapturing = RegexSource::hasCaptures(_contentName);
171std::string Rule::getDebugName()
const {
172 std::string loc = location ? (basename(location->filename) +
":" + std::to_string(location->line)) :
"unknown";
173 return "Rule#" + std::to_string(
ruleIdToNumber(
id)) +
" @ " + loc;
176std::string* Rule::getName(
const std::string* lineText,
177 const std::vector<IOnigCaptureIndex>* captureIndices)
const {
178 if (!_nameIsCapturing || _name ==
nullptr || lineText ==
nullptr || captureIndices ==
nullptr) {
179 return _name ?
new std::string(*_name) : nullptr;
181 return new std::string(RegexSource::replaceCaptures(*_name, *lineText, *captureIndices));
184std::string* Rule::getContentName(
const std::string& lineText,
185 const std::vector<IOnigCaptureIndex>& captureIndices)
const {
186 if (!_contentNameIsCapturing || _contentName ==
nullptr) {
187 return _contentName ?
new std::string(*_contentName) : nullptr;
189 return new std::string(RegexSource::replaceCaptures(*_contentName, lineText, captureIndices));
194CaptureRule::CaptureRule(ILocation* location_, RuleId id_,
195 const std::string* name,
const std::string* contentName,
196 RuleId retokenizeCapturedWithRuleId_)
197 : Rule(location_, id_, name, contentName),
198 retokenizeCapturedWithRuleId(retokenizeCapturedWithRuleId_) {
201void CaptureRule::dispose() {
205void CaptureRule::collectPatterns(IRuleRegistry* grammar, RegExpSourceList* out) {
206 throw std::runtime_error(
"Not supported!");
209CompiledRule* CaptureRule::compile(IRuleRegistry* grammar, IOnigLib* onigLib,
210 const std::string* endRegexSource) {
211 throw std::runtime_error(
"Not supported!");
214CompiledRule* CaptureRule::compileAG(IRuleRegistry* grammar, IOnigLib* onigLib,
215 const std::string* endRegexSource,
216 bool allowA,
bool allowG) {
217 throw std::runtime_error(
"Not supported!");
222MatchRule::MatchRule(ILocation* location_, RuleId id_,
223 const std::string* name,
const std::string& match,
224 const std::vector<CaptureRule*>& captures_)
225 : Rule(location_, id_, name, nullptr),
228 _cachedCompiledPatterns(nullptr) {
231MatchRule::~MatchRule() {
237void MatchRule::dispose() {
238 if (_cachedCompiledPatterns) {
239 _cachedCompiledPatterns->dispose();
240 delete _cachedCompiledPatterns;
241 _cachedCompiledPatterns =
nullptr;
245std::string MatchRule::getDebugMatchRegExp()
const {
246 return _match.source;
249void MatchRule::collectPatterns(IRuleRegistry* grammar, RegExpSourceList* out) {
253CompiledRule* MatchRule::compile(IRuleRegistry* grammar, IOnigLib* onigLib,
254 const std::string* endRegexSource) {
255 return _getCachedCompiledPatterns(grammar)->compile(onigLib);
258CompiledRule* MatchRule::compileAG(IRuleRegistry* grammar, IOnigLib* onigLib,
259 const std::string* endRegexSource,
260 bool allowA,
bool allowG) {
261 return _getCachedCompiledPatterns(grammar)->compileAG(onigLib, allowA, allowG);
264RegExpSourceList* MatchRule::_getCachedCompiledPatterns(IRuleRegistry* grammar) {
265 if (!_cachedCompiledPatterns) {
266 _cachedCompiledPatterns =
new RegExpSourceList();
267 collectPatterns(grammar, _cachedCompiledPatterns);
269 return _cachedCompiledPatterns;
274IncludeOnlyRule::IncludeOnlyRule(ILocation* location_, RuleId id_,
275 const std::string* name,
const std::string* contentName,
276 const ICompilePatternsResult& patterns_)
277 : Rule(location_, id_, name, contentName),
278 patterns(patterns_.patterns),
279 hasMissingPatterns(patterns_.hasMissingPatterns),
280 _cachedCompiledPatterns(nullptr) {
283void IncludeOnlyRule::dispose() {
284 if (_cachedCompiledPatterns) {
285 _cachedCompiledPatterns->dispose();
286 delete _cachedCompiledPatterns;
287 _cachedCompiledPatterns =
nullptr;
291void IncludeOnlyRule::collectPatterns(IRuleRegistry* grammar, RegExpSourceList* out) {
292 for (
const auto& pattern : patterns) {
293 Rule* rule = grammar->getRule(pattern);
294 rule->collectPatterns(grammar, out);
298CompiledRule* IncludeOnlyRule::compile(IRuleRegistry* grammar, IOnigLib* onigLib,
299 const std::string* endRegexSource) {
300 return _getCachedCompiledPatterns(grammar)->compile(onigLib);
303CompiledRule* IncludeOnlyRule::compileAG(IRuleRegistry* grammar, IOnigLib* onigLib,
304 const std::string* endRegexSource,
305 bool allowA,
bool allowG) {
306 return _getCachedCompiledPatterns(grammar)->compileAG(onigLib, allowA, allowG);
309RegExpSourceList* IncludeOnlyRule::_getCachedCompiledPatterns(IRuleRegistry* grammar) {
310 if (!_cachedCompiledPatterns) {
311 _cachedCompiledPatterns =
new RegExpSourceList();
312 collectPatterns(grammar, _cachedCompiledPatterns);
314 return _cachedCompiledPatterns;
319BeginEndRule::BeginEndRule(ILocation* location_, RuleId id_,
320 const std::string* name,
const std::string* contentName,
321 const std::string& begin,
const std::vector<CaptureRule*>& beginCaptures_,
322 const std::string& end,
const std::vector<CaptureRule*>& endCaptures_,
323 bool applyEndPatternLast_,
const ICompilePatternsResult& patterns_)
324 : Rule(location_, id_, name, contentName),
327 beginCaptures(beginCaptures_),
328 endHasBackReferences(_end.hasBackReferences),
329 endCaptures(endCaptures_),
330 applyEndPatternLast(applyEndPatternLast_),
331 patterns(patterns_.patterns),
332 hasMissingPatterns(patterns_.hasMissingPatterns),
333 _cachedCompiledPatterns(nullptr) {
336BeginEndRule::~BeginEndRule() {
343void BeginEndRule::dispose() {
344 if (_cachedCompiledPatterns) {
345 _cachedCompiledPatterns->dispose();
346 delete _cachedCompiledPatterns;
347 _cachedCompiledPatterns =
nullptr;
351std::string BeginEndRule::getDebugBeginRegExp()
const {
352 return _begin.source;
355std::string BeginEndRule::getDebugEndRegExp()
const {
359std::string BeginEndRule::getEndWithResolvedBackReferences(
const std::string& lineText,
360 const std::vector<IOnigCaptureIndex>& captureIndices) {
361 return _end.resolveBackReferences(lineText, captureIndices);
364void BeginEndRule::collectPatterns(IRuleRegistry* grammar, RegExpSourceList* out) {
368CompiledRule* BeginEndRule::compile(IRuleRegistry* grammar, IOnigLib* onigLib,
369 const std::string* endRegexSource) {
370 std::string endSource = endRegexSource ? *endRegexSource : _end.source;
371 return _getCachedCompiledPatterns(grammar, endSource)->compile(onigLib);
374CompiledRule* BeginEndRule::compileAG(IRuleRegistry* grammar, IOnigLib* onigLib,
375 const std::string* endRegexSource,
376 bool allowA,
bool allowG) {
377 std::string endSource = endRegexSource ? *endRegexSource : _end.source;
378 return _getCachedCompiledPatterns(grammar, endSource)->compileAG(onigLib, allowA, allowG);
381RegExpSourceList* BeginEndRule::_getCachedCompiledPatterns(IRuleRegistry* grammar,
382 const std::string& endRegexSource) {
383 if (!_cachedCompiledPatterns) {
384 _cachedCompiledPatterns =
new RegExpSourceList();
386 for (
const auto& pattern : patterns) {
387 Rule* rule = grammar->getRule(pattern);
388 rule->collectPatterns(grammar, _cachedCompiledPatterns);
393 RegexSource* endPattern = _end.hasBackReferences ? _end.clone() :
new RegexSource(_end.source, _end.ruleId);
394 if (applyEndPatternLast) {
395 _cachedCompiledPatterns->push(endPattern);
397 _cachedCompiledPatterns->unshift(endPattern);
401 if (_end.hasBackReferences) {
402 int index = applyEndPatternLast ? (_cachedCompiledPatterns->length() - 1) : 0;
403 _cachedCompiledPatterns->setSource(index, endRegexSource);
406 return _cachedCompiledPatterns;
411BeginWhileRule::BeginWhileRule(ILocation* location_, RuleId id_,
412 const std::string* name,
const std::string* contentName,
413 const std::string& begin,
const std::vector<CaptureRule*>& beginCaptures_,
414 const std::string& whilePattern,
const std::vector<CaptureRule*>& whileCaptures_,
415 const ICompilePatternsResult& patterns_)
416 : Rule(location_, id_, name, contentName),
419 beginCaptures(beginCaptures_),
420 whileCaptures(whileCaptures_),
421 whileHasBackReferences(_while.hasBackReferences),
422 patterns(patterns_.patterns),
423 hasMissingPatterns(patterns_.hasMissingPatterns),
424 _cachedCompiledPatterns(nullptr),
425 _cachedCompiledWhilePatterns(nullptr) {
428BeginWhileRule::~BeginWhileRule() {
434void BeginWhileRule::dispose() {
435 if (_cachedCompiledPatterns) {
436 _cachedCompiledPatterns->dispose();
437 delete _cachedCompiledPatterns;
438 _cachedCompiledPatterns =
nullptr;
440 if (_cachedCompiledWhilePatterns) {
441 _cachedCompiledWhilePatterns->dispose();
442 delete _cachedCompiledWhilePatterns;
443 _cachedCompiledWhilePatterns =
nullptr;
447std::string BeginWhileRule::getDebugBeginRegExp()
const {
448 return _begin.source;
451std::string BeginWhileRule::getDebugWhileRegExp()
const {
452 return _while.source;
455std::string BeginWhileRule::getWhileWithResolvedBackReferences(
const std::string& lineText,
456 const std::vector<IOnigCaptureIndex>& captureIndices) {
457 return _while.resolveBackReferences(lineText, captureIndices);
460void BeginWhileRule::collectPatterns(IRuleRegistry* grammar, RegExpSourceList* out) {
464CompiledRule* BeginWhileRule::compile(IRuleRegistry* grammar, IOnigLib* onigLib,
465 const std::string* endRegexSource) {
466 return _getCachedCompiledPatterns(grammar)->compile(onigLib);
469CompiledRule* BeginWhileRule::compileAG(IRuleRegistry* grammar, IOnigLib* onigLib,
470 const std::string* endRegexSource,
471 bool allowA,
bool allowG) {
472 return _getCachedCompiledPatterns(grammar)->compileAG(onigLib, allowA, allowG);
475CompiledRule* BeginWhileRule::compileWhile(IOnigLib* onigLib,
const std::string* endRegexSource) {
476 std::string whileSource = endRegexSource ? *endRegexSource : _while.source;
477 return _getCachedCompiledWhilePatterns(onigLib, whileSource)->compile(onigLib);
480CompiledRule* BeginWhileRule::compileWhileAG(IOnigLib* onigLib,
const std::string* endRegexSource,
481 bool allowA,
bool allowG) {
482 std::string whileSource = endRegexSource ? *endRegexSource : _while.source;
483 return _getCachedCompiledWhilePatterns(onigLib, whileSource)->compileAG(onigLib, allowA, allowG);
486RegExpSourceList* BeginWhileRule::_getCachedCompiledPatterns(IRuleRegistry* grammar) {
487 if (!_cachedCompiledPatterns) {
488 _cachedCompiledPatterns =
new RegExpSourceList();
490 for (
const auto& pattern : patterns) {
491 Rule* rule = grammar->getRule(pattern);
492 rule->collectPatterns(grammar, _cachedCompiledPatterns);
495 return _cachedCompiledPatterns;
498RegExpSourceList* BeginWhileRule::_getCachedCompiledWhilePatterns(IOnigLib* onigLib,
499 const std::string& whileRegexSource) {
500 if (!_cachedCompiledWhilePatterns) {
501 _cachedCompiledWhilePatterns =
new RegExpSourceList();
504 RegexSource* whilePattern = _while.hasBackReferences ? _while.clone() :
new RegexSource(_while.source, _while.ruleId);
505 _cachedCompiledWhilePatterns->push(whilePattern);
508 if (_while.hasBackReferences) {
509 _cachedCompiledWhilePatterns->setSource(0, whileRegexSource);
512 return _cachedCompiledWhilePatterns;
517Rule* RuleFactory::createCaptureRule(IRuleFactoryHelper* helper, ILocation* location,
518 const std::string* name,
const std::string* contentName,
519 RuleId retokenizeCapturedWithRuleId) {
520 CaptureRule* rule =
new CaptureRule(location,
ruleIdFromNumber(-1), name, contentName, retokenizeCapturedWithRuleId);
521 RuleId registeredId = helper->registerRule(rule);
522 rule->id = registeredId;
526RuleId RuleFactory::getCompiledRuleId(IRawRule* desc, IRuleFactoryHelper* helper, IRawRepository* repository) {
531 if (desc->id !=
nullptr) {
536 RuleId ruleId = helper->allocateRuleId();
537 desc->id =
new RuleId(ruleId);
539 Rule* rule =
nullptr;
542 rule =
new MatchRule(
547 _compileCaptures(desc->captures, helper, repository)
549 }
else if (desc->begin ==
nullptr) {
553 std::vector<IRawRule*>* patterns = desc->patterns;
554 if (!patterns && desc->include) {
555 patterns =
new std::vector<IRawRule*>();
556 IRawRule* includeRule =
new IRawRule();
557 includeRule->include =
new std::string(*desc->include);
558 patterns->push_back(includeRule);
561 rule =
new IncludeOnlyRule(
566 _compilePatterns(patterns, helper, repository)
568 }
else if (desc->whilePattern) {
569 rule =
new BeginWhileRule(
575 _compileCaptures(desc->beginCaptures ? desc->beginCaptures : desc->captures, helper, repository),
577 _compileCaptures(desc->whileCaptures ? desc->whileCaptures : desc->captures, helper, repository),
578 _compilePatterns(desc->patterns, helper, repository)
581 rule =
new BeginEndRule(
587 _compileCaptures(desc->beginCaptures ? desc->beginCaptures : desc->captures, helper, repository),
588 desc->end ? *desc->end :
"",
589 _compileCaptures(desc->endCaptures ? desc->endCaptures : desc->captures, helper, repository),
590 desc->applyEndPatternLast ? *desc->applyEndPatternLast : false,
591 _compilePatterns(desc->patterns, helper, repository)
597 helper->setRule(ruleId, rule);
603std::vector<CaptureRule*> RuleFactory::_compileCaptures(IRawCaptures* captures,
604 IRuleFactoryHelper* helper,
605 IRawRepository* repository) {
606 std::vector<CaptureRule*> result;
613 int maximumCaptureId = 0;
614 for (
const auto& pair : captures->captures) {
615 int numericCaptureId = std::stoi(pair.first);
616 if (numericCaptureId > maximumCaptureId) {
617 maximumCaptureId = numericCaptureId;
622 result.resize(maximumCaptureId + 1,
nullptr);
625 for (
const auto& pair : captures->captures) {
626 int numericCaptureId = std::stoi(pair.first);
629 if (pair.second->patterns) {
630 retokenizeCapturedWithRuleId = getCompiledRuleId(pair.second, helper, repository);
634 Rule* captureRule = createCaptureRule(
636 pair.second->tmlLocation,
638 pair.second->contentName,
639 retokenizeCapturedWithRuleId
641 result[numericCaptureId] =
dynamic_cast<CaptureRule*
>(captureRule);
647ICompilePatternsResult RuleFactory::_compilePatterns(std::vector<IRawRule*>* patterns,
648 IRuleFactoryHelper* helper,
649 IRawRepository* repository) {
650 ICompilePatternsResult result;
656 for (
auto* pattern : *patterns) {
659 if (pattern->include) {
660 IncludeReference reference = parseInclude(*pattern->include);
662 switch (reference.kind) {
663 case IncludeReferenceKind::Base:
664 case IncludeReferenceKind::Self: {
666 IRawRule* repoRule = repository->getRule(*pattern->include);
668 ruleId = getCompiledRuleId(repoRule, helper, repository);
673 case IncludeReferenceKind::RelativeReference: {
675 IRawRule* localIncludedRule = repository->getRule(reference.ruleName);
676 if (localIncludedRule) {
677 ruleId = getCompiledRuleId(localIncludedRule, helper, repository);
682 case IncludeReferenceKind::TopLevelReference:
683 case IncludeReferenceKind::TopLevelRepositoryReference: {
684 const std::string& externalGrammarName = reference.scopeName;
685 const std::string* externalGrammarInclude =
686 (reference.kind == IncludeReferenceKind::TopLevelRepositoryReference)
687 ? &reference.ruleName
691 IRawGrammar* externalGrammar = helper->getExternalGrammar(externalGrammarName, repository);
693 if (externalGrammar) {
694 if (externalGrammarInclude) {
695 IRawRule* externalIncludedRule = externalGrammar->repository->getRule(*externalGrammarInclude);
696 if (externalIncludedRule) {
697 ruleId = getCompiledRuleId(externalIncludedRule, helper, externalGrammar->repository);
701 IRawRule* selfRule = externalGrammar->repository->getRule(
"$self");
703 ruleId = getCompiledRuleId(selfRule, helper, externalGrammar->repository);
711 ruleId = getCompiledRuleId(pattern, helper, repository);
715 Rule* rule = helper->getRule(ruleId);
717 bool skipRule =
false;
720 IncludeOnlyRule* includeOnlyRule =
dynamic_cast<IncludeOnlyRule*
>(rule);
721 BeginEndRule* beginEndRule =
dynamic_cast<BeginEndRule*
>(rule);
722 BeginWhileRule* beginWhileRule =
dynamic_cast<BeginWhileRule*
>(rule);
724 if (includeOnlyRule) {
725 if (includeOnlyRule->hasMissingPatterns && includeOnlyRule->patterns.empty()) {
728 }
else if (beginEndRule) {
729 if (beginEndRule->hasMissingPatterns && beginEndRule->patterns.empty()) {
732 }
else if (beginWhileRule) {
733 if (beginWhileRule->hasMissingPatterns && beginWhileRule->patterns.empty()) {
739 result.patterns.push_back(ruleId);
744 result.hasMissingPatterns = (patterns->size() != result.patterns.size());
RuleId ruleIdFromNumber(int id)
Convert an integer to a RuleId.
const RuleId END_RULE_ID(-1)
Special rule ID indicating the end of a matched region.
int ruleIdToNumber(RuleId id)
Convert a RuleId to its integer value.
const RuleId WHILE_RULE_ID(-2)
Special rule ID for 'while' rule matching.