TextMateLib 1.0
Modern C++ implementation of the TextMate syntax highlighting engine
Loading...
Searching...
No Matches
c_api.h
Go to the documentation of this file.
1#ifndef TEXTMATELIB_C_API_H
2#define TEXTMATELIB_C_API_H
3
4/// @file c_api.h
5/// @brief C language API for TextMateLib
6///
7/// This header provides a C FFI (Foreign Function Interface) for TextMateLib,
8/// enabling use from C code and language bindings (C#, Python, Node.js, etc.).
9///
10/// **API Organization:**
11/// - **Theme API**: Loading and querying theme colors and styles
12/// - **Registry & Grammar API**: Managing grammar definitions and tokenization
13/// - **Tokenization API**: Core text processing with stateful line-by-line parsing
14///
15/// **Typical Workflow:**
16/// 1. Initialize: Create registry, load grammars and themes
17/// 2. Tokenize: Call tokenize_line() or tokenize_lines() with grammar and text
18/// 3. Apply Styles: Use theme colors from returned scopes
19/// 4. Cleanup: Dispose resources and results
20
21#include "tml_export.h"
22
23#ifdef __cplusplus
24extern "C" {
25#endif
26
27#include <stdint.h>
28
29/// @defgroup opaque_types Opaque Handle Types
30/// @{
31/// Opaque pointer types for C API objects. Actual implementations are in C++,
32/// handles are meant to be passed directly without inspection.
33
34/// @brief Handle to a theme object containing color schemes
35typedef void* TextMateTheme;
36
37/// @brief Handle to a grammar definition for a specific language
38typedef void* TextMateGrammar;
39
40/// @brief Handle to a parsing state stack (immutable, used for incremental tokenization)
41typedef void* TextMateStateStack;
42
43/// @brief Handle to the Oniguruma regex library instance
44typedef void* TextMateOnigLib;
45
46/// @brief Handle to the grammar registry managing loaded grammars and themes
47typedef void* TextMateRegistry;
48
49/// @}
50
51/// @defgroup token_structures Token and Result Structures
52/// @{
53
54/// @brief Represents a single token in tokenized text
55///
56/// A token maps a range of text to a scope hierarchy (list of scopes).
57/// The scopes determine styling through theme matching.
59 int32_t startIndex; ///< Start position in the line (0-based)
60 int32_t endIndex; ///< End position (exclusive)
61 int32_t scopeDepth; ///< Number of scopes in the scope hierarchy
62 char** scopes; ///< Array of scope strings (e.g., "keyword.control", "string.quoted.double")
63};
64
65/// @brief Result from tokenizing a single line with decoded tokens
66///
67/// Returned by textmate_tokenize_line(). Contains the tokens and the state
68/// needed to continue tokenization on the next line (for incremental updates).
69///
70/// **Memory Ownership:**
71/// Caller must free this structure using textmate_free_tokenize_result().
73 TextMateToken* tokens; ///< Array of tokens found in this line
74 int32_t tokenCount; ///< Number of tokens in the array
75 TextMateStateStack ruleStack; ///< State at end of line (pass to next line's tokenization)
76 int32_t stoppedEarly; ///< Non-zero if tokenization stopped before end (time limit hit)
77};
78
79/// @brief Result from tokenizing a single line with encoded tokens
80///
81/// Alternative to TextMateTokenizeResult that uses compact 32-bit token encoding
82/// rather than decoded scopes. Used by textmate_tokenize_line2() for performance.
83///
84/// **Memory Ownership:**
85/// Caller must free this structure using textmate_free_tokenize_result2().
87 uint32_t* tokens; ///< Array of encoded tokens
88 int32_t tokenCount; ///< Number of tokens in the array
89 TextMateStateStack ruleStack; ///< State at end of line (pass to next line's tokenization)
90 int32_t stoppedEarly; ///< Non-zero if tokenization stopped before end (time limit hit)
91};
92
93/// @brief Result from batch tokenizing multiple lines
94///
95/// Returned by textmate_tokenize_lines(). Optimized for multi-line tokenization
96/// to reduce FFI call overhead in language bindings.
97///
98/// **Memory Ownership:**
99/// Caller must free this structure using textmate_free_tokenize_lines_result().
101 TextMateTokenizeResult** lineResults; ///< Array of results, one per line
102 int32_t lineCount; ///< Number of lines tokenized
103};
104
105/// @}
106
107/// @defgroup theme_api Theme API
108/// @{
109/// Load and query color schemes (themes) for syntax highlighting.
110/// Themes map scope hierarchies to foreground/background colors and font styles.
111
112/// @brief Load a theme from a JSON file
113/// @param themePath Path to the theme JSON file (TextMate theme format)
114/// @return Opaque theme handle on success, NULL on error (file not found, invalid JSON, etc.)
115/// @note The returned theme must be disposed with textmate_theme_dispose()
117 const char* themePath
118);
119
120/// @brief Load a theme from a JSON string
121/// @param jsonContent Theme JSON content as a null-terminated string
122/// @return Opaque theme handle on success, NULL on error (invalid JSON)
123/// @note The returned theme must be disposed with textmate_theme_dispose()
125 const char* jsonContent
126);
127
128/// @brief Get the foreground color for a scope path
129/// @param theme Valid theme handle (from textmate_theme_load_*)
130/// @param scopePath Scope path to match (e.g., "source.js keyword.control", "string.quoted.double")
131/// @param defaultColor Color to return if scope is not found in theme
132/// @return RGBA color value (0xRRGGBBAA format, e.g., 0xFF0000FF for opaque red)
133/// @note Scope matching uses TextMate's scope selector rules
134/// @see textmate_theme_get_background(), textmate_theme_get_font_style()
135TML_API uint32_t textmate_theme_get_foreground(
136 TextMateTheme theme,
137 const char* scopePath,
138 uint32_t defaultColor
139);
140
141/// @brief Get the background color for a scope path
142/// @param theme Valid theme handle (from textmate_theme_load_*)
143/// @param scopePath Scope path to match
144/// @param defaultColor Color to return if scope is not found in theme
145/// @return RGBA color value (0xRRGGBBAA format)
146/// @see textmate_theme_get_foreground()
147TML_API uint32_t textmate_theme_get_background(
148 TextMateTheme theme,
149 const char* scopePath,
150 uint32_t defaultColor
151);
152
153/// @brief Font style flag constants for textmate_theme_get_font_style()
154/// @{
155#define TEXTMATE_FONT_STYLE_NONE 0 ///< No special styling
156#define TEXTMATE_FONT_STYLE_ITALIC 1 ///< Italic text
157#define TEXTMATE_FONT_STYLE_BOLD 2 ///< Bold text
158#define TEXTMATE_FONT_STYLE_UNDERLINE 4 ///< Underlined text
159/// @}
160
161/// @brief Get the font style flags for a scope path
162/// @param theme Valid theme handle (from textmate_theme_load_*)
163/// @param scopePath Scope path to match
164/// @param defaultStyle Font style flags to return if scope is not found
165/// @return Combination of TEXTMATE_FONT_STYLE_* flags
166/// @note Flags can be combined with bitwise OR (e.g., BOLD | ITALIC)
167/// @see textmate_theme_get_foreground()
168TML_API int32_t textmate_theme_get_font_style(
169 TextMateTheme theme,
170 const char* scopePath,
171 int32_t defaultStyle
172);
173
174/// @brief Get the default/fallback foreground color for the entire theme
175/// @param theme Valid theme handle (from textmate_theme_load_*)
176/// @return RGBA color value (0xRRGGBBAA format)
177/// @note Used when no matching scope is found in the theme
179
180/// @brief Get the default/fallback background color for the entire theme
181/// @param theme Valid theme handle (from textmate_theme_load_*)
182/// @return RGBA color value (0xRRGGBBAA format)
183/// @note Used when no matching scope is found in the theme
185
186/// @brief Free a theme object and release resources
187/// @param theme Valid theme handle (from textmate_theme_load_*), or NULL (no-op)
188/// @warning Do not use theme after calling this function
189/// @note Safe to call with NULL
190TML_API void textmate_theme_dispose(TextMateTheme theme);
191
192/// @}
193
194/// @defgroup registry_api Registry and Grammar API
195/// @{
196/// Manage grammar definitions, handle dependencies, and perform tokenization.
197/// The registry is the central component for working with multiple grammars and themes.
198
199/// @brief Initialize the Oniguruma regular expression library
200/// @return Opaque Oniguruma library handle on success, NULL on error
201/// @note This must be created before creating a registry
202/// @note The returned handle must be disposed with textmate_oniglib_dispose()
204
205/// @brief Create a new grammar registry
206/// @param onigLib Valid Oniguruma library handle (from textmate_oniglib_create())
207/// @return Registry handle on success, NULL on error
208/// @note The registry must be disposed with textmate_registry_dispose()
209/// @see textmate_oniglib_create()
211
212/// @brief Free a registry and all its resources
213/// @param registry Valid registry handle (from textmate_registry_create()), or NULL (no-op)
214/// @warning Do not use registry after calling this function
215/// @warning All grammars loaded from this registry become invalid
216/// @note Safe to call with NULL
217TML_API void textmate_registry_dispose(TextMateRegistry registry);
218
219/// @brief Register a grammar from a JSON file
220/// @param registry Valid registry handle
221/// @param grammarPath Path to the grammar JSON file (TextMate grammar format)
222/// @return Non-zero on success, 0 on error (file not found, invalid JSON, etc.)
223/// @note Grammars must be registered before they can be loaded with textmate_registry_load_grammar()
224/// @see textmate_registry_add_grammar_from_json(), textmate_registry_load_grammar()
226 TextMateRegistry registry,
227 const char* grammarPath
228);
229
230/// @brief Register a grammar from a JSON string
231/// @param registry Valid registry handle
232/// @param jsonContent Grammar JSON content as a null-terminated string (TextMate grammar format)
233/// @return Non-zero on success, 0 on error (invalid JSON, etc.)
234/// @note Grammars must be registered before they can be loaded
235/// @see textmate_registry_add_grammar_from_file(), textmate_registry_load_grammar()
237 TextMateRegistry registry,
238 const char* jsonContent
239);
240
241/// @brief Set grammar injection rules for a scope
242/// @param registry Valid registry handle
243/// @param scopeName Scope to inject grammars into (e.g., "source.js string.quoted.single")
244/// @param injections Array of grammar scope names to inject
245/// @param injectionCount Number of injections in the array
246/// @note Call before loading the target grammar to take effect
247/// @note Allows embedding one grammar within another (e.g., regex highlighting in string literals)
249 TextMateRegistry registry,
250 const char* scopeName,
251 const char** injections,
252 int32_t injectionCount
253);
254
255/// @brief Load a grammar by scope name
256/// @param registry Valid registry handle
257/// @param scopeName Scope name of the grammar to load (e.g., "source.javascript", "text.html.markdown")
258/// @return Grammar handle on success, NULL if grammar not found or registration failed
259/// @note Automatically resolves grammar dependencies and includes
260/// @note The grammar must have been previously registered with textmate_registry_add_grammar_*()
261/// @see textmate_registry_add_grammar_from_file(), textmate_registry_add_grammar_from_json()
263 TextMateRegistry registry,
264 const char* scopeName
265);
266
267/// @}
268
269/// @defgroup tokenization_api Tokenization API
270/// @{
271/// Tokenize text using a grammar, handling stateful line-by-line parsing.
272
273/// @brief Get the initial parsing state
274/// @return The INITIAL state stack (first line of a document)
275/// @note This is used as the prevState parameter for the first line
276/// @note The returned state is read-only and should not be freed
278
279/// @brief Tokenize a single line of text with decoded scopes
280/// @param grammar Valid grammar handle (from textmate_registry_load_grammar())
281/// @param lineText The text to tokenize (should not include newline)
282/// @param prevState The state from the previous line (or initial state for first line)
283/// @return Pointer to tokenization result on success, NULL on error
284/// @note The returned result must be freed with textmate_free_tokenize_result()
285/// @note Use the ruleStack from the result as prevState for the next line
286/// @see textmate_tokenize_line2() for encoded token format (more efficient)
287/// @see textmate_get_initial_state()
289 TextMateGrammar grammar,
290 const char* lineText,
291 TextMateStateStack prevState
292);
293
294/// @brief Tokenize a single line of text with encoded tokens (more efficient)
295/// @param grammar Valid grammar handle (from textmate_registry_load_grammar())
296/// @param lineText The text to tokenize (should not include newline)
297/// @param prevState The state from the previous line (or initial state for first line)
298/// @return Pointer to tokenization result on success, NULL on error
299/// @note The returned result must be freed with textmate_free_tokenize_result2()
300/// @note Tokens are encoded as 32-bit values for better performance
301/// @note Prefer this over textmate_tokenize_line() for performance-critical code
303 TextMateGrammar grammar,
304 const char* lineText,
305 TextMateStateStack prevState
306);
307
308/// @brief Tokenize multiple lines in a single call
309/// @param grammar Valid grammar handle
310/// @param lines Array of line strings (none should include newline)
311/// @param lineCount Number of lines in the array
312/// @param initialState The state to start with (typically INITIAL or from Session API)
313/// @return Pointer to batch result on success, NULL on error
314/// @note The returned result must be freed with textmate_free_tokenize_lines_result()
315/// @note Reduces FFI call overhead when tokenizing multiple lines (important for language bindings)
316/// @note Each result's ruleStack is automatically passed to the next line
317/// @see textmate_free_tokenize_lines_result()
319 TextMateGrammar grammar,
320 const char** lines,
321 int32_t lineCount,
322 TextMateStateStack initialState
323);
324
325/// @brief Free a line tokenization result
326/// @param result Valid result pointer (from textmate_tokenize_line()), or NULL (no-op)
327/// @warning Do not use result after calling this function
328/// @note Safe to call with NULL
330
331/// @brief Free an encoded line tokenization result
332/// @param result Valid result pointer (from textmate_tokenize_line2()), or NULL (no-op)
333/// @warning Do not use result after calling this function
334/// @note Safe to call with NULL
336
337/// @brief Free a batch tokenization result
338/// @param result Valid result pointer (from textmate_tokenize_lines()), or NULL (no-op)
339/// @warning Do not use result after calling this function
340/// @note Safe to call with NULL
342
343/// @defgroup tokenization_utf16_api UTF-16 Tokenization API
344/// @{
345/// Tokenize text and return indices as UTF-16 code unit offsets.
346/// Use these from language bindings where strings are UTF-16 encoded (C#, JavaScript).
347/// The original functions above return UTF-8 byte offsets which are correct for C/C++.
348
349/// @brief Tokenize a single line with decoded scopes, returning UTF-16 indices
350/// @param grammar Valid grammar handle (from textmate_registry_load_grammar())
351/// @param lineText The text to tokenize (UTF-8, null-terminated)
352/// @param prevState The state from the previous line (or initial state for first line)
353/// @return Pointer to tokenization result on success, NULL on error
354/// @note Token startIndex/endIndex are UTF-16 code unit offsets
355/// @note The returned result must be freed with textmate_free_tokenize_result()
357 TextMateGrammar grammar,
358 const char* lineText,
359 TextMateStateStack prevState
360);
361
362/// @brief Tokenize a single line with encoded tokens, returning UTF-16 indices
363/// @param grammar Valid grammar handle (from textmate_registry_load_grammar())
364/// @param lineText The text to tokenize (UTF-8, null-terminated)
365/// @param prevState The state from the previous line (or initial state for first line)
366/// @return Pointer to tokenization result on success, NULL on error
367/// @note Start offsets in the encoded tokens are UTF-16 code unit offsets
368/// @note The returned result must be freed with textmate_free_tokenize_result2()
370 TextMateGrammar grammar,
371 const char* lineText,
372 TextMateStateStack prevState
373);
374
375/// @brief Tokenize multiple lines in a single call, returning UTF-16 indices
376/// @param grammar Valid grammar handle
377/// @param lines Array of line strings (UTF-8, null-terminated, none should include newline)
378/// @param lineCount Number of lines in the array
379/// @param initialState The state to start with (typically INITIAL or from Session API)
380/// @return Pointer to batch result on success, NULL on error
381/// @note Token startIndex/endIndex are UTF-16 code unit offsets
382/// @note The returned result must be freed with textmate_free_tokenize_lines_result()
384 TextMateGrammar grammar,
385 const char** lines,
386 int32_t lineCount,
387 TextMateStateStack initialState
388);
389
390/// @}
391
392/// @brief Get the scope name (language identifier) of a grammar
393/// @param grammar Valid grammar handle (from textmate_registry_load_grammar())
394/// @return Scope name string (e.g., "source.javascript"), valid for lifetime of grammar
395/// @return NULL if grammar is invalid
396TML_API const char* textmate_grammar_get_scope_name(TextMateGrammar grammar);
397
398/// @brief Free the Oniguruma library
399/// @param onigLib Valid Oniguruma handle (from textmate_oniglib_create()), or NULL (no-op)
400/// @warning Do not use onigLib after calling this function
401/// @warning All registries and grammars created with this lib become invalid
402/// @note Safe to call with NULL
403TML_API void textmate_oniglib_dispose(TextMateOnigLib onigLib);
404
405/// @}
406
407#ifdef __cplusplus
408}
409#endif
410
411#endif // TEXTMATELIB_C_API_H
void * TextMateGrammar
Handle to a grammar definition for a specific language.
Definition c_api.h:38
void * TextMateTheme
Handle to a theme object containing color schemes.
Definition c_api.h:35
void * TextMateRegistry
Handle to the grammar registry managing loaded grammars and themes.
Definition c_api.h:47
void * TextMateStateStack
Handle to a parsing state stack (immutable, used for incremental tokenization)
Definition c_api.h:41
void * TextMateOnigLib
Handle to the Oniguruma regex library instance.
Definition c_api.h:44
TML_API int textmate_registry_add_grammar_from_json(TextMateRegistry registry, const char *jsonContent)
Register a grammar from a JSON string.
Definition c_api.cpp:543
TML_API TextMateOnigLib textmate_oniglib_create()
Initialize the Oniguruma regular expression library.
Definition c_api.cpp:445
TML_API void textmate_registry_dispose(TextMateRegistry registry)
Free a registry and all its resources.
Definition c_api.cpp:505
TML_API int textmate_registry_add_grammar_from_file(TextMateRegistry registry, const char *grammarPath)
Register a grammar from a JSON file.
Definition c_api.cpp:513
TML_API TextMateRegistry textmate_registry_create(TextMateOnigLib onigLib)
Create a new grammar registry.
Definition c_api.cpp:495
TML_API TextMateGrammar textmate_registry_load_grammar(TextMateRegistry registry, const char *scopeName)
Load a grammar by scope name.
Definition c_api.cpp:592
TML_API void textmate_registry_set_injections(TextMateRegistry registry, const char *scopeName, const char **injections, int32_t injectionCount)
Set grammar injection rules for a scope.
Definition c_api.cpp:567
TML_API uint32_t textmate_theme_get_default_background(TextMateTheme theme)
Get the default/fallback background color for the entire theme.
Definition c_api.cpp:416
TML_API uint32_t textmate_theme_get_background(TextMateTheme theme, const char *scopePath, uint32_t defaultColor)
Get the background color for a scope path.
Definition c_api.cpp:278
TML_API void textmate_theme_dispose(TextMateTheme theme)
Free a theme object and release resources.
Definition c_api.cpp:437
TML_API uint32_t textmate_theme_get_foreground(TextMateTheme theme, const char *scopePath, uint32_t defaultColor)
Get the foreground color for a scope path.
Definition c_api.cpp:215
TML_API int32_t textmate_theme_get_font_style(TextMateTheme theme, const char *scopePath, int32_t defaultStyle)
Get the font style flags for a scope path.
Definition c_api.cpp:341
TML_API uint32_t textmate_theme_get_default_foreground(TextMateTheme theme)
Get the default/fallback foreground color for the entire theme.
Definition c_api.cpp:395
TML_API TextMateTheme textmate_theme_load_from_json(const char *jsonContent)
Load a theme from a JSON string.
Definition c_api.cpp:191
TML_API TextMateTheme textmate_theme_load_from_file(const char *themePath)
Load a theme from a JSON file.
Definition c_api.cpp:162
TML_API void textmate_free_tokenize_result(TextMateTokenizeResult *result)
Free a line tokenization result.
Definition c_api.cpp:692
TML_API const char * textmate_grammar_get_scope_name(TextMateGrammar grammar)
Get the scope name (language identifier) of a grammar.
Definition c_api.cpp:947
TML_API TextMateTokenizeMultiLinesResult * textmate_tokenize_lines(TextMateGrammar grammar, const char **lines, int32_t lineCount, TextMateStateStack initialState)
Tokenize multiple lines in a single call.
Definition c_api.cpp:720
TML_API void textmate_oniglib_dispose(TextMateOnigLib onigLib)
Free the Oniguruma library.
Definition c_api.cpp:963
TML_API TextMateStateStack textmate_get_initial_state()
Get the initial parsing state.
Definition c_api.cpp:610
TML_API TextMateTokenizeResult2 * textmate_tokenize_line2(TextMateGrammar grammar, const char *lineText, TextMateStateStack prevState)
Tokenize a single line of text with encoded tokens (more efficient)
Definition c_api.cpp:658
TML_API void textmate_free_tokenize_lines_result(TextMateTokenizeMultiLinesResult *result)
Free a batch tokenization result.
Definition c_api.cpp:778
TML_API TextMateTokenizeResult * textmate_tokenize_line(TextMateGrammar grammar, const char *lineText, TextMateStateStack prevState)
Tokenize a single line of text with decoded scopes.
Definition c_api.cpp:615
TML_API void textmate_free_tokenize_result2(TextMateTokenizeResult2 *result)
Free an encoded line tokenization result.
Definition c_api.cpp:710
TML_API TextMateTokenizeMultiLinesResult * textmate_tokenize_lines_utf16(TextMateGrammar grammar, const char **lines, int32_t lineCount, TextMateStateStack initialState)
Tokenize multiple lines in a single call, returning UTF-16 indices.
Definition c_api.cpp:886
TML_API TextMateTokenizeResult2 * textmate_tokenize_line2_utf16(TextMateGrammar grammar, const char *lineText, TextMateStateStack prevState)
Tokenize a single line with encoded tokens, returning UTF-16 indices.
Definition c_api.cpp:842
TML_API TextMateTokenizeResult * textmate_tokenize_line_utf16(TextMateGrammar grammar, const char *lineText, TextMateStateStack prevState)
Tokenize a single line with decoded scopes, returning UTF-16 indices.
Definition c_api.cpp:794
Represents a single token in tokenized text.
Definition c_api.h:58
int32_t startIndex
Start position in the line (0-based)
Definition c_api.h:59
int32_t endIndex
End position (exclusive)
Definition c_api.h:60
char ** scopes
Array of scope strings (e.g., "keyword.control", "string.quoted.double")
Definition c_api.h:62
int32_t scopeDepth
Number of scopes in the scope hierarchy.
Definition c_api.h:61
Result from batch tokenizing multiple lines.
Definition c_api.h:100
int32_t lineCount
Number of lines tokenized.
Definition c_api.h:102
TextMateTokenizeResult ** lineResults
Array of results, one per line.
Definition c_api.h:101
Result from tokenizing a single line with encoded tokens.
Definition c_api.h:86
int32_t tokenCount
Number of tokens in the array.
Definition c_api.h:88
TextMateStateStack ruleStack
State at end of line (pass to next line's tokenization)
Definition c_api.h:89
uint32_t * tokens
Array of encoded tokens.
Definition c_api.h:87
int32_t stoppedEarly
Non-zero if tokenization stopped before end (time limit hit)
Definition c_api.h:90
Result from tokenizing a single line with decoded tokens.
Definition c_api.h:72
int32_t stoppedEarly
Non-zero if tokenization stopped before end (time limit hit)
Definition c_api.h:76
TextMateStateStack ruleStack
State at end of line (pass to next line's tokenization)
Definition c_api.h:75
TextMateToken * tokens
Array of tokens found in this line.
Definition c_api.h:73
int32_t tokenCount
Number of tokens in the array.
Definition c_api.h:74