TextMateLib 1.0
Modern C++ implementation of the TextMate syntax highlighting engine
Loading...
Searching...
No Matches
onigLib.cpp
1#include "onigLib.h"
2#include <cstring>
3#include <iostream>
4
5namespace tml {
6
7// OnigString implementation
8
9OnigString::OnigString(const std::string& str)
10 : _content(str), _utf8Ptr(reinterpret_cast<const UChar*>(_content.c_str())) {
11}
12
13OnigString::~OnigString() {
14 dispose();
15}
16
17void OnigString::dispose() {
18 // Nothing to dispose in this implementation
19 // The string content is managed by std::string
20}
21
22// OnigScanner implementation
23
24OnigScanner::OnigScanner(const std::vector<std::string>& sources)
25 : _sources(sources), _regSet(nullptr), _disposed(false) {
26 _regexes.resize(sources.size(), nullptr);
27 compilePatterns();
28}
29
30OnigScanner::~OnigScanner() {
31 dispose();
32}
33
34bool OnigScanner::compilePatterns() {
35 OnigEncoding encoding = ONIG_ENCODING_UTF8;
36 OnigSyntaxType* syntax = ONIG_SYNTAX_DEFAULT;
37
38 for (size_t i = 0; i < _sources.size(); i++) {
39 const std::string& pattern = _sources[i];
40 OnigRegex reg = nullptr;
41
42 const UChar* patternPtr = reinterpret_cast<const UChar*>(pattern.c_str());
43 const UChar* patternEnd = patternPtr + pattern.length();
44
45 OnigErrorInfo einfo;
46 int r = onig_new(&reg, patternPtr, patternEnd, ONIG_OPTION_CAPTURE_GROUP,
47 encoding, syntax, &einfo);
48
49 if (r != ONIG_NORMAL) {
50 // Error compiling pattern
51 OnigUChar s[ONIG_MAX_ERROR_MESSAGE_LEN];
52 onig_error_code_to_str(s, r, &einfo);
53 // In production, we should handle this error properly
54 return false;
55 }
56
57 _regexes[i] = reg;
58 }
59
60 // Create regex set for efficient multi-pattern matching
61 if (!_regexes.empty()) {
62 int n = _regexes.size();
63 OnigRegex* regArray = new OnigRegex[n];
64 for (int i = 0; i < n; i++) {
65 regArray[i] = _regexes[i];
66 }
67
68 int r = onig_regset_new(&_regSet, n, regArray);
69 delete[] regArray;
70
71 if (r != ONIG_NORMAL) {
72 return false;
73 }
74 }
75
76 return true;
77}
78
79IOnigMatch* OnigScanner::findNextMatchSync(const std::string& string,
80 int startPosition,
81 OrMask<FindOption> options) {
82 OnigString onigStr(string);
83 return findNextMatchSync(&onigStr, startPosition, options);
84}
85
86IOnigMatch* OnigScanner::findNextMatchSync(OnigString* string,
87 int startPosition,
88 OrMask<FindOption> options) {
89 if (_disposed || _regSet == nullptr || string == nullptr) {
90 return nullptr;
91 }
92
93 const UChar* strPtr = string->utf8Ptr();
94 const UChar* strEnd = strPtr + string->utf8Length();
95 const UChar* start = strPtr + startPosition;
96
97 if (start > strEnd) {
98 return nullptr;
99 }
100
101 // Convert options to Oniguruma options
102 OnigOptionType onigOptions = ONIG_OPTION_NONE;
103 if (options & NotBeginString) {
104 onigOptions |= ONIG_OPTION_NOT_BEGIN_STRING;
105 }
106 if (options & NotEndString) {
107 onigOptions |= ONIG_OPTION_NOT_END_STRING;
108 }
109 if (options & NotBeginPosition) {
110 onigOptions |= ONIG_OPTION_NOT_BEGIN_POSITION;
111 }
112
113 // Search using regex set
114 OnigRegSetLead lead = ONIG_REGSET_POSITION_LEAD;
115 int matchPos = -1;
116 int regIndex = onig_regset_search(_regSet, strPtr, strEnd, start, strEnd,
117 lead, onigOptions, &matchPos);
118
119 if (regIndex < 0) {
120 // No match found
121 return nullptr;
122 }
123
124 // Get the matched regex
125 OnigRegex matchedReg = _regexes[regIndex];
126 if (matchedReg == nullptr) {
127 return nullptr;
128 }
129
130 // Create a region for capturing match info
131 OnigRegion* region = onig_region_new();
132
133 // Perform the match to get capture groups
134 int r = onig_search(matchedReg, strPtr, strEnd, start, strEnd, region, onigOptions);
135 if (r < 0) {
136 onig_region_free(region, 1);
137 return nullptr;
138 }
139
140 // Create match result
141 IOnigMatch* match = new IOnigMatch();
142 match->index = regIndex;
143
144 // Extract capture groups
145 for (int i = 0; i < region->num_regs; i++) {
146 int captureStart = region->beg[i];
147 int captureEnd = region->end[i];
148
149 if (captureStart >= 0 && captureEnd >= 0) {
150 match->captureIndices.push_back(IOnigCaptureIndex(captureStart, captureEnd));
151 } else {
152 match->captureIndices.push_back(IOnigCaptureIndex(0, 0));
153 }
154 }
155
156 // Free the region
157 onig_region_free(region, 1);
158
159 return match;
160}
161
162void OnigScanner::dispose() {
163 if (_disposed) {
164 return;
165 }
166
167 if (_regSet != nullptr) {
168 // ✅ FIX: onig_regset_free() already frees all the individual regexes
169 // So we must clear the _regexes pointers to avoid double-free
170 onig_regset_free(_regSet);
171 _regSet = nullptr;
172
173 // Clear the regex pointers since they were already freed by onig_regset_free
174 for (size_t i = 0; i < _regexes.size(); i++) {
175 _regexes[i] = nullptr;
176 }
177 } else {
178 // No regset, need to free individual regexes manually
179 for (size_t i = 0; i < _regexes.size(); i++) {
180 if (_regexes[i] != nullptr) {
181 onig_free(_regexes[i]);
182 _regexes[i] = nullptr;
183 }
184 }
185 }
186
187 _disposed = true;
188}
189
190// DefaultOnigLib implementation
191
192DefaultOnigLib::DefaultOnigLib() {
193 // Initialize Oniguruma
194 onig_init();
195}
196
197DefaultOnigLib::~DefaultOnigLib() {
198 // End Oniguruma
199 onig_end();
200}
201
202OnigScanner* DefaultOnigLib::createOnigScanner(const std::vector<std::string>& sources) {
203 return new OnigScanner(sources);
204}
205
206OnigString* DefaultOnigLib::createOnigString(const std::string& str) {
207 return new OnigString(str);
208}
209
210// Helper functions
211
212void disposeOnigString(OnigString* str) {
213 if (str != nullptr) {
214 str->dispose();
215 }
216}
217
218} // namespace tml