TextMateLib 1.0
Modern C++ implementation of the TextMate syntax highlighting engine
Loading...
Searching...
No Matches
grammar.h
1#ifndef TEXTMATELIB_GRAMMAR_H
2#define TEXTMATELIB_GRAMMAR_H
3
4#include "types.h"
5#include "rule.h"
6#include "theme.h"
7#include "onigLib.h"
8#include "rawGrammar.h"
9#include "registry.h"
10#include "basicScopesAttributeProvider.h"
11#include "matcher.h"
12#include <string>
13#include <vector>
14#include <memory>
15#include <cstdint>
16
17namespace tml {
18
19// Forward declarations
20class StateStackImpl;
21class AttributedScopeStack;
22class LineTokens;
23struct TokenTypeMatcher;
24
25// IToken interface
26struct IToken {
27 int startIndex;
28 int endIndex;
29 std::vector<std::string> scopes;
30
31 IToken() : startIndex(0), endIndex(0) {}
32};
33
34// ITokenizeLineResult interface
35struct ITokenizeLineResult {
36 std::vector<IToken> tokens;
37 StateStack* ruleStack;
38 bool stoppedEarly;
39
40 ITokenizeLineResult() : ruleStack(nullptr), stoppedEarly(false) {}
41};
42
43// ITokenizeLineResult2 interface
44struct ITokenizeLineResult2 {
45 std::vector<uint32_t> tokens; // Uint32Array equivalent
46 StateStack* ruleStack;
47 bool stoppedEarly;
48
49 ITokenizeLineResult2() : ruleStack(nullptr), stoppedEarly(false) {}
50};
51
52// IGrammar interface
53class IGrammar {
54public:
55 virtual ~IGrammar() {}
56
57 virtual ITokenizeLineResult tokenizeLine(
58 const std::string& lineText,
59 StateStack* prevState,
60 int timeLimit = 0
61 ) = 0;
62
63 virtual ITokenizeLineResult2 tokenizeLine2(
64 const std::string& lineText,
65 StateStack* prevState,
66 int timeLimit = 0
67 ) = 0;
68};
69
70// Injection structure
71struct Injection {
72 std::string debugSelector;
73 Matcher<std::vector<std::string>> matcher;
74 int priority; // -1, 0, or 1
75 RuleId ruleId;
76 IRawGrammar* grammar;
77
78 Injection() : priority(0), ruleId(ruleIdFromNumber(-1)), grammar(nullptr) {}
79};
80
81// TokenTypeMatcher structure
82struct TokenTypeMatcher {
83 Matcher<std::vector<std::string>> matcher;
85
86 TokenTypeMatcher() : type(StandardTokenType::Other) {}
87};
88
89// BalancedBracketSelectors class
90class BalancedBracketSelectors {
91private:
92 std::vector<Matcher<std::vector<std::string>>> _balancedBracketMatchers;
93 std::vector<Matcher<std::vector<std::string>>> _unbalancedBracketMatchers;
94 bool _allowAny;
95
96public:
97 BalancedBracketSelectors(
98 const std::vector<std::string>& balancedBracketSelectors,
99 const std::vector<std::string>& unbalancedBracketSelectors
100 );
101
102 bool matchesAlways() const;
103 bool matchesNever() const;
104 bool match(const std::vector<std::string>& scopes) const;
105};
106
107// AttributedScopeStack class
108class AttributedScopeStack {
109public:
110 AttributedScopeStack* parent;
111 ScopeName scopeName;
112 EncodedTokenAttributes tokenAttributes;
113
114 AttributedScopeStack(
115 AttributedScopeStack* parent_,
116 const ScopeName& scopeName_,
117 EncodedTokenAttributes tokenAttributes_
118 );
119 ~AttributedScopeStack();
120
121 static AttributedScopeStack* createRoot(
122 const std::string& scopeName,
123 EncodedTokenAttributes tokenAttributes
124 );
125
126 static AttributedScopeStack* createRootAndLookUpScopeName(
127 const std::string& scopeName,
128 EncodedTokenAttributes tokenAttributes,
129 Grammar* grammar
130 );
131
132 AttributedScopeStack* push(
133 Grammar* grammar,
134 const std::string& scopeName
135 );
136
137 AttributedScopeStack* pushAttributed(
138 const std::string& scopePath,
139 Grammar* grammar
140 );
141
142 std::vector<std::string> getScopeNames() const;
143
144 static bool equals(AttributedScopeStack* a, AttributedScopeStack* b);
145
146private:
147 static AttributedScopeStack* _pushAttributed(
148 AttributedScopeStack* target,
149 const std::string& scopeName,
150 Grammar* grammar
151 );
152};
153
154// StateStackImpl class (StateStack implementation)
155class StateStackImpl : public StateStack {
156private:
157 int _enterPos;
158 int _anchorPos;
159
160public:
161 static StateStackImpl* NULL_STATE;
162
163 StateStackImpl* parent;
164 RuleId ruleId;
165 bool beginRuleCapturedEOL;
166 std::string* endRule;
167 AttributedScopeStack* nameScopesList;
168 AttributedScopeStack* contentNameScopesList;
169
170 StateStackImpl(
171 StateStackImpl* parent_,
172 RuleId ruleId_,
173 int enterPos_,
174 int anchorPos_,
175 bool beginRuleCapturedEOL_,
176 const std::string* endRule_,
177 AttributedScopeStack* nameScopesList_,
178 AttributedScopeStack* contentNameScopesList_
179 );
180
181 ~StateStackImpl();
182
183 // StateStack interface implementation
184 int depth;
185 int getDepth() const override { return depth; }
186 StateStack* clone() override;
187 bool equals(StateStack* other) override;
188
189 void reset();
190
191 // Stack manipulation
192 StateStackImpl* push(
193 RuleId ruleId,
194 int enterPos,
195 int anchorPos,
196 bool beginRuleCapturedEOL,
197 const std::string* endRule,
198 AttributedScopeStack* nameScopesList,
199 AttributedScopeStack* contentNameScopesList
200 );
201
202 StateStackImpl* pop();
203 StateStackImpl* safePop();
204
205 // Accessors
206 int getEnterPos() const { return _enterPos; }
207 int getAnchorPos() const { return _anchorPos; }
208 Rule* getRule(Grammar* grammar);
209
210 // State modification
211 StateStackImpl* withContentNameScopesList(AttributedScopeStack* contentNameScopesList);
212 StateStackImpl* withEndRule(const std::string& endRule);
213
214 // Comparison
215 bool hasSameRuleAs(StateStackImpl* other);
216
217 std::string toString() const;
218};
219
220// LineTokens class
221class LineTokens {
222private:
223 bool _emitBinaryTokens;
224 std::string _lineText;
225 std::vector<TokenTypeMatcher> _tokenTypeMatchers;
226 BalancedBracketSelectors* _balancedBracketSelectors;
227
228 std::vector<IToken> _tokens;
229 std::vector<uint32_t> _binaryTokens;
230 int _lastTokenEndIndex;
231
232public:
233 LineTokens(
234 bool emitBinaryTokens,
235 const std::string& lineText,
236 const std::vector<TokenTypeMatcher>& tokenTypeMatchers,
237 BalancedBracketSelectors* balancedBracketSelectors
238 );
239
240 void produce(StateStackImpl* stack, int endIndex);
241 void produceFromScopes(AttributedScopeStack* scopesList, int endIndex);
242
243 std::vector<IToken> getResult(StateStackImpl* stack, int lineLength);
244 std::vector<uint32_t> getBinaryResult(StateStackImpl* stack, int lineLength);
245};
246
247// Grammar class
248class Grammar : public IGrammar, public IRuleFactoryHelper, public IOnigLib {
249private:
250 ScopeName _rootScopeName;
251 RuleId _rootId;
252 int _lastRuleId;
253 std::vector<Rule*> _ruleId2desc;
254 std::map<std::string, IRawGrammar*> _includedGrammars;
255 IGrammarRepository* _grammarRepository;
256 IThemeProvider* _themeProvider;
257 IRawGrammar* _grammar;
258 std::vector<Injection>* _injections;
259 BasicScopeAttributesProvider* _basicScopeAttributesProvider;
260 std::vector<TokenTypeMatcher> _tokenTypeMatchers;
261 IOnigLib* _onigLib;
262
263public:
264 BalancedBracketSelectors* balancedBracketSelectors;
265
266 Grammar(
267 const ScopeName& rootScopeName,
268 IRawGrammar* grammar,
269 int initialLanguage,
270 const EmbeddedLanguagesMap* embeddedLanguages,
271 const TokenTypeMap* tokenTypes,
272 BalancedBracketSelectors* balancedBracketSelectors_,
273 IGrammarRepository* grammarRepository,
274 IThemeProvider* themeProvider,
275 IOnigLib* onigLib
276 );
277
278 ~Grammar();
279
280 void dispose();
281
282 IThemeProvider* getThemeProvider() const { return _themeProvider; }
283
284 size_t getRuleCount() const { return _ruleId2desc.size(); }
285
286 // IOnigLib implementation
287 OnigScanner* createOnigScanner(const std::vector<std::string>& sources) override;
288 OnigString* createOnigString(const std::string& str) override;
289
290 // IRuleRegistry implementation
291 Rule* getRule(RuleId ruleId) override;
292 RuleId registerRule(Rule* rule) override;
293
294 // IRuleFactoryHelper implementation (new methods)
295 RuleId allocateRuleId() override;
296 void setRule(RuleId ruleId, Rule* rule) override;
297
298 // IGrammarRegistry implementation
299 IRawGrammar* getExternalGrammar(const std::string& scopeName, IRawRepository* repository) override;
300
301 // Get metadata for scope
302 BasicScopeAttributes getMetadataForScope(const std::string& scope);
303
304 // Get injections
305 std::vector<Injection> getInjections();
306
307 // IGrammar implementation
308 ITokenizeLineResult tokenizeLine(
309 const std::string& lineText,
310 StateStack* prevState,
311 int timeLimit = 0
312 ) override;
313
314 ITokenizeLineResult2 tokenizeLine2(
315 const std::string& lineText,
316 StateStack* prevState,
317 int timeLimit = 0
318 ) override;
319
320 // Get the root scope name of this grammar
321 ScopeName getScopeName() const { return _rootScopeName; }
322
323private:
324 std::vector<Injection> _collectInjections();
325
326 struct TokenizeResult {
327 int lineLength;
328 LineTokens* lineTokens;
329 StateStackImpl* ruleStack;
330 bool stoppedEarly;
331 };
332
333 TokenizeResult _tokenize(
334 const std::string& lineText,
335 StateStackImpl* prevState,
336 bool emitBinaryTokens,
337 int timeLimit
338 );
339};
340
341// Helper function to create grammar
342Grammar* createGrammar(
343 const ScopeName& scopeName,
344 IRawGrammar* grammar,
345 int initialLanguage,
346 const EmbeddedLanguagesMap* embeddedLanguages,
347 const TokenTypeMap* tokenTypes,
348 BalancedBracketSelectors* balancedBracketSelectors,
349 IGrammarRepository* grammarRepository,
350 IThemeProvider* themeProvider,
351 IOnigLib* onigLib
352);
353
354// Initialize grammar (merge with base if needed)
355IRawGrammar* initGrammar(IRawGrammar* grammar, IRawRule* base);
356
357} // namespace tml
358
359#endif // TEXTMATELIB_GRAMMAR_H
std::string ScopeName
Semantic name identifying a scope (e.g., "source.javascript", "comment.line")
Definition types.h:20
std::map< std::string, int > EmbeddedLanguagesMap
Map from embedded language name to token type ID.
Definition types.h:175
std::map< std::string, StandardTokenType > TokenTypeMap
Map from scope pattern to standard token type.
Definition types.h:179
RuleId ruleIdFromNumber(int id)
Convert an integer to a RuleId.
Definition types.h:109
StandardTokenType
Standard TextMate token type for syntax classification.
Definition types.h:136
@ Other
Not a recognized standard type.
int32_t EncodedTokenAttributes
Compact 32-bit encoding of a token's attributes.
Definition types.h:128
Core type definitions and interfaces for TextMateLib.