1#ifndef TEXTMATELIB_UTF16_UTILS_H
2#define TEXTMATELIB_UTF16_UTILS_H
20inline std::vector<int32_t> buildByteToUtf16Map(
const char* utf8,
size_t byteLen) {
21 std::vector<int32_t> map(byteLen + 1);
22 int32_t utf16Index = 0;
28 unsigned char ch =
static_cast<unsigned char>(utf8[i]);
35 }
else if ((ch & 0xE0) == 0xC0) {
38 }
else if ((ch & 0xF0) == 0xE0) {
41 }
else if ((ch & 0xF8) == 0xF0) {
51 for (
size_t j = 1; j < seqLen && (i + j) < byteLen; j++) {
52 map[i + j] = utf16Index;
56 utf16Index += utf16Units;
60 map[byteLen] = utf16Index;
69inline int32_t mapByteToUtf16(
const std::vector<int32_t>& map, int32_t byteOffset) {
70 if (byteOffset < 0)
return 0;
71 size_t idx =
static_cast<size_t>(byteOffset);
72 if (idx >= map.size()) idx = map.size() - 1;