TextMateLib 1.0
Modern C++ implementation of the TextMate syntax highlighting engine
Loading...
Searching...
No Matches
utils.cpp
1#include "utils.h"
2#include "onigLib.h"
3#include <algorithm>
4#include <cctype>
5#include <chrono>
6
7namespace tml {
8
9std::string basename(const std::string& path) {
10 size_t idx = path.find_last_of("/\\");
11 if (idx == std::string::npos) {
12 return path;
13 } else if (idx == path.length() - 1) {
14 return basename(path.substr(0, path.length() - 1));
15 } else {
16 return path.substr(idx + 1);
17 }
18}
19
20int strcmp_custom(const std::string& a, const std::string& b) {
21 if (a < b) {
22 return -1;
23 }
24 if (a > b) {
25 return 1;
26 }
27 return 0;
28}
29
30int strArrCmp(const std::vector<std::string>* a, const std::vector<std::string>* b) {
31 if (a == nullptr && b == nullptr) {
32 return 0;
33 }
34 if (a == nullptr) {
35 return -1;
36 }
37 if (b == nullptr) {
38 return 1;
39 }
40 size_t len1 = a->size();
41 size_t len2 = b->size();
42 if (len1 == len2) {
43 for (size_t i = 0; i < len1; i++) {
44 int res = strcmp_custom((*a)[i], (*b)[i]);
45 if (res != 0) {
46 return res;
47 }
48 }
49 return 0;
50 }
51 return static_cast<int>(len1) - static_cast<int>(len2);
52}
53
54bool isValidHexColor(const std::string& hex) {
55 std::regex pattern1("^#[0-9a-fA-F]{6}$"); // #rrggbb
56 std::regex pattern2("^#[0-9a-fA-F]{8}$"); // #rrggbbaa
57 std::regex pattern3("^#[0-9a-fA-F]{3}$"); // #rgb
58 std::regex pattern4("^#[0-9a-fA-F]{4}$"); // #rgba
59
60 return std::regex_match(hex, pattern1) ||
61 std::regex_match(hex, pattern2) ||
62 std::regex_match(hex, pattern3) ||
63 std::regex_match(hex, pattern4);
64}
65
66std::string escapeRegExpCharacters(const std::string& value) {
67 std::string result;
68 result.reserve(value.length() * 2);
69 for (char c : value) {
70 if (c == '-' || c == '\\' || c == '{' || c == '}' || c == '*' ||
71 c == '+' || c == '?' || c == '|' || c == '^' || c == '$' ||
72 c == '.' || c == ',' || c == '[' || c == ']' || c == '(' ||
73 c == ')' || c == '#' || std::isspace(c)) {
74 result += '\\';
75 }
76 result += c;
77 }
78 return result;
79}
80
81// RTL detection regex pattern
82static std::regex* CONTAINS_RTL = nullptr;
83
84static std::regex makeContainsRtl() {
85 // Generated using https://github.com/alexdima/unicode-utils/blob/main/rtl-test.js
86 return std::regex(
87 "(?:[\u05BE\u05C0\u05C3\u05C6\u05D0-\u05F4\u0608\u060B\u060D\u061B-\u064A\u066D-\u066F"
88 "\u0671-\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u0710\u0712-\u072F\u074D-\u07A5\u07B1-\u07EA"
89 "\u07F4\u07F5\u07FA\u07FE-\u0815\u081A\u0824\u0828\u0830-\u0858\u085E-\u088E\u08A0-\u08C9"
90 "\u200F\uFB1D\uFB1F-\uFB28\uFB2A-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFC\uFE70-\uFEFC])",
91 std::regex_constants::ECMAScript
92 );
93}
94
95bool containsRTL(const std::string& str) {
96 if (CONTAINS_RTL == nullptr) {
97 CONTAINS_RTL = new std::regex(makeContainsRtl());
98 }
99 return std::regex_search(str, *CONTAINS_RTL);
100}
101
102double performanceNow() {
103 auto now = std::chrono::high_resolution_clock::now();
104 auto duration = now.time_since_epoch();
105 return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
106}
107
108// RegexSource implementation
109
110static std::regex CAPTURING_REGEX_SOURCE("\\$(\\d+)|\\$\\{(\\d+):\\/(downcase|upcase)\\}");
111
112RegexSource::RegexSource(const std::string& regExpSource, RuleId ruleId_)
113 : source(regExpSource), ruleId(ruleId_), hasAnchor(false), hasBackReferences(false) {
114
115 // Check for anchors
116 if (regExpSource.find("\\A") != std::string::npos ||
117 regExpSource.find("\\G") != std::string::npos) {
118 hasAnchor = true;
119 }
120
121 // Check for back references
122 std::regex backRefPattern("\\\\(\\d+)");
123 hasBackReferences = std::regex_search(regExpSource, backRefPattern);
124
125 // Build anchor cache if needed
126 if (hasAnchor) {
127 buildAnchorCache();
128 }
129}
130
131bool RegexSource::hasCaptures(const std::string* regexSource) {
132 if (regexSource == nullptr) {
133 return false;
134 }
135 return std::regex_search(*regexSource, CAPTURING_REGEX_SOURCE);
136}
137
138std::string RegexSource::replaceCaptures(const std::string& regexSource,
139 const std::string& captureSource,
140 const std::vector<IOnigCaptureIndex>& captureIndices) {
141 std::string result;
142 std::smatch match;
143 std::string::const_iterator searchStart(regexSource.cbegin());
144
145 while (std::regex_search(searchStart, regexSource.cend(), match, CAPTURING_REGEX_SOURCE)) {
146 result += match.prefix();
147
148 std::string indexStr = match[1].matched ? match[1].str() : match[2].str();
149 std::string command = match[3].matched ? match[3].str() : "";
150
151 int index = std::stoi(indexStr);
152
153 if (index < static_cast<int>(captureIndices.size())) {
154 const IOnigCaptureIndex& capture = captureIndices[index];
155 if (capture.length > 0) {
156 std::string captureText = captureSource.substr(capture.start, capture.length);
157
158 // Remove leading dots
159 while (!captureText.empty() && captureText[0] == '.') {
160 captureText = captureText.substr(1);
161 }
162
163 if (command == "downcase") {
164 std::transform(captureText.begin(), captureText.end(), captureText.begin(), ::tolower);
165 } else if (command == "upcase") {
166 std::transform(captureText.begin(), captureText.end(), captureText.begin(), ::toupper);
167 }
168
169 result += captureText;
170 } else {
171 result += match.str();
172 }
173 } else {
174 result += match.str();
175 }
176
177 searchStart = match.suffix().first;
178 }
179
180 result += std::string(searchStart, regexSource.cend());
181 return result;
182}
183
184std::string RegexSource::resolveBackReferences(const std::string& lineText,
185 const std::vector<IOnigCaptureIndex>& captureIndices) {
186 std::regex backRefPattern("\\\\(\\d+)");
187 std::string result;
188 std::smatch match;
189 std::string::const_iterator searchStart(source.cbegin());
190
191 while (std::regex_search(searchStart, source.cend(), match, backRefPattern)) {
192 result += match.prefix();
193
194 int index = std::stoi(match[1].str());
195
196 if (index < static_cast<int>(captureIndices.size())) {
197 const IOnigCaptureIndex& capture = captureIndices[index];
198 if (capture.length > 0) {
199 std::string captureText = lineText.substr(capture.start, capture.length);
200 result += escapeRegExpCharacters(captureText);
201 }
202 } else {
203 result += match.str();
204 }
205
206 searchStart = match.suffix().first;
207 }
208
209 result += std::string(searchStart, source.cend());
210 return result;
211}
212
213void RegexSource::buildAnchorCache() {
214 // Build all 4 variants of the anchor cache
215 // A0_G0: \A -> \uFFFF, \G -> \uFFFF (replace with char that never matches)
216 // A0_G1: \A -> \uFFFF, \G -> \G
217 // A1_G0: \A -> \A, \G -> \uFFFF
218 // A1_G1: \A -> \A, \G -> \G
219
220 // Start with copies of the source
221 std::string A0_G0_result;
222 std::string A0_G1_result;
223 std::string A1_G0_result;
224 std::string A1_G1_result;
225
226 A0_G0_result.reserve(source.length());
227 A0_G1_result.reserve(source.length());
228 A1_G0_result.reserve(source.length());
229 A1_G1_result.reserve(source.length());
230
231 for (size_t i = 0; i < source.length(); i++) {
232 char ch = source[i];
233
234 // Default: copy character as-is
235 A0_G0_result += ch;
236 A0_G1_result += ch;
237 A1_G0_result += ch;
238 A1_G1_result += ch;
239
240 if (ch == '\\' && i + 1 < source.length()) {
241 char nextCh = source[i + 1];
242 i++; // Skip the next character in the loop
243
244 if (nextCh == 'A') {
245 // Replace \A based on allowA flag
246 // When allowA=false, replace with \uFFFF (a character that will never match)
247 A0_G0_result += "\uFFFF"; // A=false, G=false
248 A0_G1_result += "\uFFFF"; // A=false, G=true
249 A1_G0_result += 'A'; // A=true, G=false
250 A1_G1_result += 'A'; // A=true, G=true
251 } else if (nextCh == 'G') {
252 // Replace \G based on allowG flag
253 A0_G0_result += "\uFFFF"; // A=false, G=false
254 A0_G1_result += 'G'; // A=false, G=true
255 A1_G0_result += "\uFFFF"; // A=true, G=false
256 A1_G1_result += 'G'; // A=true, G=true
257 } else {
258 // Other escaped characters, keep as-is
259 A0_G0_result += nextCh;
260 A0_G1_result += nextCh;
261 A1_G0_result += nextCh;
262 A1_G1_result += nextCh;
263 }
264 }
265 }
266
267 anchorCache_A0_G0 = A0_G0_result;
268 anchorCache_A0_G1 = A0_G1_result;
269 anchorCache_A1_G0 = A1_G0_result;
270 anchorCache_A1_G1 = A1_G1_result;
271}
272
273std::string RegexSource::resolveAnchors(bool allowA, bool allowG) const {
274 if (!hasAnchor) {
275 return source;
276 }
277
278 if (allowA) {
279 if (allowG) {
280 return anchorCache_A1_G1;
281 } else {
282 return anchorCache_A1_G0;
283 }
284 } else {
285 if (allowG) {
286 return anchorCache_A0_G1;
287 } else {
288 return anchorCache_A0_G0;
289 }
290 }
291}
292
293RegexSource* RegexSource::clone() const {
294 return new RegexSource(source, ruleId);
295}
296
297} // namespace tml