TextMateLib 1.0
Modern C++ implementation of the TextMate syntax highlighting engine
Loading...
Searching...
No Matches
types.h
Go to the documentation of this file.
1/// @file types.h
2/// @brief Core type definitions and interfaces for TextMateLib
3
4#ifndef TEXTMATELIB_TYPES_H
5#define TEXTMATELIB_TYPES_H
6
7#include <string>
8#include <vector>
9#include <map>
10#include <memory>
11#include <cstdint>
12
13namespace tml {
14
15/// @defgroup core_types Core Types and Identifiers
16/// @{
17
18/// @brief Semantic name identifying a scope (e.g., "source.javascript", "comment.line")
19/// @see ScopePath
20using ScopeName = std::string;
21
22/// @brief Full hierarchical scope path (e.g., "source.js keyword.control string.quoted.double")
23/// @details Space-separated list of scopes representing the nesting hierarchy at a point in the text.
24/// @see ScopeName
25using ScopePath = std::string;
26
27/// @brief Regular expression pattern string used in grammar rules
28/// @see Rule
29using ScopePattern = std::string;
30
31/// @brief String referencing another grammar to include (scope name or file path)
32/// @see Rule
33using IncludeString = std::string;
34
35/// @brief Regular expression pattern as a string
36/// @see Rule
37using RegExpString = std::string;
38
39/// @brief Generic template for bitwise OR-able flags/masks
40/// @tparam T The flag enum type (not actually used in current implementation)
41template<typename T>
42using OrMask = int;
43
44/// @}
45
46/// @defgroup state_management State and Parsing Management
47/// @{
48
49/// @brief Abstract interface representing the parsing state at the end of a line
50/// @details StateStack is immutable and encodes the hierarchy of active grammar rules
51/// after parsing a line. Two StateStacks are equal() if and only if parsing can
52/// resume from the exact same position (all scopes and nesting are identical).
53/// This enables incremental tokenization optimization: if prevState equals previousLine's ruleStack,
54/// the line's tokens may not have changed (early stopping).
56public:
57 virtual ~StateStack() {}
58
59 /// @brief Get the nesting depth (number of active rules)
60 /// @return Stack depth (0 for initial state)
61 virtual int getDepth() const = 0;
62
63 /// @brief Create an independent copy of this state
64 /// @return Dynamically allocated copy (caller must delete)
65 virtual StateStack* clone() = 0;
66
67 /// @brief Check if this state equals another (for incremental tokenization)
68 /// @param other StateStack to compare with
69 /// @return True if states represent identical parsing positions
70 /// @note Critical for incremental tokenization performance
71 virtual bool equals(StateStack* other) = 0;
72};
73
74/// @}
75
76/// @defgroup rule_identification Rule Identification
77/// @{
78
79/// @brief Opaque identifier for a grammar rule
80/// @details RuleIds are small integers assigned during grammar compilation.
81/// They uniquely identify rules within a grammar for state tracking.
82struct RuleId {
83 int id; ///< Internal rule identifier (positive for normal rules, negative for special rules)
84
85 /// @brief Create a RuleId with the given value
86 /// @param value Rule identifier value
87 explicit RuleId(int value) : id(value) {}
88
89 /// @brief Equality comparison
90 bool operator==(const RuleId& other) const {
91 return id == other.id;
92 }
93
94 /// @brief Inequality comparison
95 bool operator!=(const RuleId& other) const {
96 return id != other.id;
97 }
98};
99
100/// @brief Special rule ID indicating the end of a matched region
102
103/// @brief Special rule ID for 'while' rule matching
105
106/// @brief Convert an integer to a RuleId
107/// @param id Integer value to convert
108/// @return RuleId wrapping the value
109inline RuleId ruleIdFromNumber(int id) {
110 return RuleId(id);
111}
112
113/// @brief Convert a RuleId to its integer value
114/// @param id RuleId to extract
115/// @return Integer identifier value
116inline int ruleIdToNumber(RuleId id) {
117 return id.id;
118}
119
120/// @}
121
122/// @defgroup token_encoding Token Encoding and Attributes
123/// @{
124
125/// @brief Compact 32-bit encoding of a token's attributes
126/// @details Encodes start/end position, scope depth, and attributes in a single int32_t
127/// for space-efficient token representation (alternative to detailed token structs).
129
130/// @}
131
132/// @defgroup token_classification Token Type Classification
133/// @{
134
135/// @brief Standard TextMate token type for syntax classification
137 Other = 0, ///< Not a recognized standard type
138 Comment = 1, ///< Comment text
139 String = 2, ///< String literal
140 RegEx = 3 ///< Regular expression literal
141};
142
143/// @brief Standard token type with optional (unknown) state
144/// @details Like StandardTokenType but includes a NotSet value for unclassified regions
146 Other = 0, ///< Not a recognized standard type
147 Comment = 1, ///< Comment text
148 String = 2, ///< String literal
149 RegEx = 3, ///< Regular expression literal
150 NotSet = 8 ///< Type not determined or not applicable
151};
152
153/// @}
154
155/// @defgroup styling Font and Display Styling
156/// @{
157
158/// @brief Font styling attributes (italic, bold, underline, strikethrough)
159enum class FontStyle {
160 NotSet = -1, ///< Styling not specified (inherit or use default)
161 None = 0, ///< No special styling
162 Italic = 1, ///< Italic text
163 Bold = 2, ///< Bold (heavy weight) text
164 Underline = 4, ///< Underlined text
165 Strikethrough = 8 ///< Struck-through text
166};
167
168/// @}
169
170/// @defgroup mapping Maps and Collections
171/// @{
172
173/// @brief Map from embedded language name to token type ID
174/// @details Used to classify tokens as specific language constructs
175using EmbeddedLanguagesMap = std::map<std::string, int>;
176
177/// @brief Map from scope pattern to standard token type
178/// @details Associates scope names with their semantic token classifications
179using TokenTypeMap = std::map<std::string, StandardTokenType>;
180
181/// @}
182
183} // namespace tml
184
185#endif // TEXTMATELIB_TYPES_H
Abstract interface representing the parsing state at the end of a line.
Definition types.h:55
virtual int getDepth() const =0
Get the nesting depth (number of active rules)
virtual bool equals(StateStack *other)=0
Check if this state equals another (for incremental tokenization)
virtual StateStack * clone()=0
Create an independent copy of this state.
std::string RegExpString
Regular expression pattern as a string.
Definition types.h:37
std::string ScopePattern
Regular expression pattern string used in grammar rules.
Definition types.h:29
int OrMask
Generic template for bitwise OR-able flags/masks.
Definition types.h:42
std::string ScopeName
Semantic name identifying a scope (e.g., "source.javascript", "comment.line")
Definition types.h:20
std::string ScopePath
Full hierarchical scope path (e.g., "source.js keyword.control string.quoted.double")
Definition types.h:25
std::string IncludeString
String referencing another grammar to include (scope name or file path)
Definition types.h:33
std::map< std::string, int > EmbeddedLanguagesMap
Map from embedded language name to token type ID.
Definition types.h:175
std::map< std::string, StandardTokenType > TokenTypeMap
Map from scope pattern to standard token type.
Definition types.h:179
RuleId ruleIdFromNumber(int id)
Convert an integer to a RuleId.
Definition types.h:109
const RuleId END_RULE_ID(-1)
Special rule ID indicating the end of a matched region.
int ruleIdToNumber(RuleId id)
Convert a RuleId to its integer value.
Definition types.h:116
const RuleId WHILE_RULE_ID(-2)
Special rule ID for 'while' rule matching.
FontStyle
Font styling attributes (italic, bold, underline, strikethrough)
Definition types.h:159
@ Bold
Bold (heavy weight) text.
@ Italic
Italic text.
@ Underline
Underlined text.
@ Strikethrough
Struck-through text.
StandardTokenType
Standard TextMate token type for syntax classification.
Definition types.h:136
OptionalStandardTokenType
Standard token type with optional (unknown) state.
Definition types.h:145
@ Comment
Comment text.
@ String
String literal.
@ Other
Not a recognized standard type.
@ RegEx
Regular expression literal.
@ NotSet
Type not determined or not applicable.
int32_t EncodedTokenAttributes
Compact 32-bit encoding of a token's attributes.
Definition types.h:128
Opaque identifier for a grammar rule.
Definition types.h:82
RuleId(int value)
Create a RuleId with the given value.
Definition types.h:87
bool operator!=(const RuleId &other) const
Inequality comparison.
Definition types.h:95
bool operator==(const RuleId &other) const
Equality comparison.
Definition types.h:90
int id
Internal rule identifier (positive for normal rules, negative for special rules)
Definition types.h:83