|
5 | 5 | * Created: |
6 | 6 | * April 12, 1961 at 09:07:34 PM GMT+3 |
7 | 7 | * Modified: |
8 | | - * February 16, 2026 at 2:02:11 PM GMT+3 |
| 8 | + * February 16, 2026 at 4:01:29 PM GMT+3 |
9 | 9 | * |
10 | 10 | */ |
11 | 11 | /* |
@@ -221,86 +221,7 @@ static INLINE bool INLINE_ATTRIBUTE parse_number(const char **s, json_value *v) |
221 | 221 | return true; |
222 | 222 | } |
223 | 223 |
|
224 | | -#ifdef STRING_VALIDATION |
225 | | -static INLINE bool INLINE_ATTRIBUTE validate_string_chunk(const char *s, size_t len) { |
226 | | - size_t i = 0; |
227 | | - |
228 | | -#if defined(__AVX2__) |
229 | | - const size_t offset = 32; |
230 | | - const __m256i limit = _mm256_set1_epi8(0x20); |
231 | | - const __m256i high_bit = _mm256_set1_epi8(0x80); |
232 | | - const __m256i limit_shifted = _mm256_xor_si256(limit, high_bit); |
233 | | - /* Process 64 bytes per iteration (2 AVX2 registers) */ |
234 | | - for (; i + 64 <= len; i += 64) { |
235 | | - __m256i chunk1 = _mm256_loadu_si256((const __m256i *)(s + i)); |
236 | | - __m256i chunk2 = _mm256_loadu_si256((const __m256i *)(s + i + 32)); |
237 | | - __m256i chunk1_shifted = _mm256_xor_si256(chunk1, high_bit); |
238 | | - __m256i chunk2_shifted = _mm256_xor_si256(chunk2, high_bit); |
239 | | - __m256i result_mask1 = _mm256_cmpgt_epi8(limit_shifted, chunk1_shifted); |
240 | | - __m256i result_mask2 = _mm256_cmpgt_epi8(limit_shifted, chunk2_shifted); |
241 | | - if (_mm256_movemask_epi8(result_mask1) != 0 || _mm256_movemask_epi8(result_mask2) != 0) { |
242 | | - return false; |
243 | | - } |
244 | | - } |
245 | | - /* Process remaining chunks of 32 bytes */ |
246 | | - for (; i + offset <= len; i += offset) { |
247 | | - __m256i chunk = _mm256_loadu_si256((const __m256i *)(s + i)); |
248 | | - __m256i chunk_shifted = _mm256_xor_si256(chunk, high_bit); |
249 | | - __m256i result_mask = _mm256_cmpgt_epi8(limit_shifted, chunk_shifted); |
250 | | - if (_mm256_movemask_epi8(result_mask) != 0) { |
251 | | - return false; |
252 | | - } |
253 | | - } |
254 | | -#elif defined(__SSE2__) |
255 | | - const size_t offset = 16; |
256 | | - const __m128i limit = _mm_set1_epi8(0x20); |
257 | | - const __m128i high_bit = _mm_set1_epi8(0x80); |
258 | | - const __m128i limit_shifted = _mm_xor_si128(limit, high_bit); |
259 | | - /* Process 64 bytes per iteration (4 SSE2 registers) */ |
260 | | - for (; i + 64 <= len; i += 64) { |
261 | | - __m128i chunk1 = _mm_loadu_si128((const __m128i *)(s + i)); |
262 | | - __m128i chunk2 = _mm_loadu_si128((const __m128i *)(s + i + 16)); |
263 | | - __m128i chunk3 = _mm_loadu_si128((const __m128i *)(s + i + 32)); |
264 | | - __m128i chunk4 = _mm_loadu_si128((const __m128i *)(s + i + 48)); |
265 | | - __m128i chunk1_shifted = _mm_xor_si128(chunk1, high_bit); |
266 | | - __m128i chunk2_shifted = _mm_xor_si128(chunk2, high_bit); |
267 | | - __m128i chunk3_shifted = _mm_xor_si128(chunk3, high_bit); |
268 | | - __m128i chunk4_shifted = _mm_xor_si128(chunk4, high_bit); |
269 | | - __m128i result_mask1 = _mm_cmplt_epi8(chunk1_shifted, limit_shifted); |
270 | | - __m128i result_mask2 = _mm_cmplt_epi8(chunk2_shifted, limit_shifted); |
271 | | - __m128i result_mask3 = _mm_cmplt_epi8(chunk3_shifted, limit_shifted); |
272 | | - __m128i result_mask4 = _mm_cmplt_epi8(chunk4_shifted, limit_shifted); |
273 | | - if (_mm_movemask_epi8(result_mask1) != 0 || _mm_movemask_epi8(result_mask2) != 0 || |
274 | | - _mm_movemask_epi8(result_mask3) != 0 || _mm_movemask_epi8(result_mask4) != 0) { |
275 | | - return false; |
276 | | - } |
277 | | - } |
278 | | - /* Process remaining chunks of 16 bytes */ |
279 | | - for (; i + offset <= len; i += offset) { |
280 | | - __m128i chunk = _mm_loadu_si128((const __m128i *)(s + i)); |
281 | | - __m128i chunk_shifted = _mm_xor_si128(chunk, high_bit); |
282 | | - __m128i result_mask = _mm_cmplt_epi8(chunk_shifted, limit_shifted); |
283 | | - if (_mm_movemask_epi8(result_mask) != 0) { |
284 | | - return false; |
285 | | - } |
286 | | - } |
287 | | -#else |
288 | | - /* scalar fallback */ |
289 | | - const size_t offset = 1; |
290 | | -#endif |
291 | | - |
292 | | - /* scalar tail */ |
293 | | - for (; i < len; i++) { |
294 | | - if ((unsigned char)s[i] < 0x20) { |
295 | | - return false; |
296 | | - } |
297 | | - } |
298 | | - return true; |
299 | | -} |
300 | | -#endif |
301 | | - |
302 | | - |
303 | | - |
| 224 | +/* STRING_VALIDATION inlined into parse_string below. */ |
304 | 225 | static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end, json_value *v) { |
305 | 226 | const char *p = *s + 1; |
306 | 227 | v->u.string.ptr = p; |
@@ -432,8 +353,74 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end |
432 | 353 | if (p == end) |
433 | 354 | return false; |
434 | 355 | #ifdef STRING_VALIDATION |
435 | | - if (!validate_string_chunk(p - span, span)) |
436 | | - return false; |
| 356 | + { |
| 357 | + const char *s_chk = p - span; |
| 358 | + size_t len_chk = span; |
| 359 | + size_t j = 0; |
| 360 | +#if defined(__AVX2__) |
| 361 | + const size_t offset2 = 32; |
| 362 | + const __m256i limit2 = _mm256_set1_epi8(0x20); |
| 363 | + const __m256i high_bit2 = _mm256_set1_epi8(0x80); |
| 364 | + const __m256i limit_shifted2 = _mm256_xor_si256(limit2, high_bit2); |
| 365 | + for (; j + 64 <= len_chk; j += 64) { |
| 366 | + __m256i chunk1 = _mm256_loadu_si256((const __m256i *)(s_chk + j)); |
| 367 | + __m256i chunk2 = _mm256_loadu_si256((const __m256i *)(s_chk + j + 32)); |
| 368 | + __m256i chunk1_shifted = _mm256_xor_si256(chunk1, high_bit2); |
| 369 | + __m256i chunk2_shifted = _mm256_xor_si256(chunk2, high_bit2); |
| 370 | + __m256i result_mask1 = _mm256_cmpgt_epi8(limit_shifted2, chunk1_shifted); |
| 371 | + __m256i result_mask2 = _mm256_cmpgt_epi8(limit_shifted2, chunk2_shifted); |
| 372 | + if (_mm256_movemask_epi8(result_mask1) != 0 || _mm256_movemask_epi8(result_mask2) != 0) { |
| 373 | + return false; |
| 374 | + } |
| 375 | + } |
| 376 | + for (; j + offset2 <= len_chk; j += offset2) { |
| 377 | + __m256i chunk = _mm256_loadu_si256((const __m256i *)(s_chk + j)); |
| 378 | + __m256i chunk_shifted = _mm256_xor_si256(chunk, high_bit2); |
| 379 | + __m256i result_mask = _mm256_cmpgt_epi8(limit_shifted2, chunk_shifted); |
| 380 | + if (_mm256_movemask_epi8(result_mask) != 0) { |
| 381 | + return false; |
| 382 | + } |
| 383 | + } |
| 384 | +#elif defined(__SSE2__) |
| 385 | + const size_t offset2 = 16; |
| 386 | + const __m128i limit2 = _mm_set1_epi8(0x20); |
| 387 | + const __m128i high_bit2 = _mm_set1_epi8(0x80); |
| 388 | + const __m128i limit_shifted2 = _mm_xor_si128(limit2, high_bit2); |
| 389 | + for (; j + 64 <= len_chk; j += 64) { |
| 390 | + __m128i chunk1 = _mm_loadu_si128((const __m128i *)(s_chk + j)); |
| 391 | + __m128i chunk2 = _mm_loadu_si128((const __m128i *)(s_chk + j + 16)); |
| 392 | + __m128i chunk3 = _mm_loadu_si128((const __m128i *)(s_chk + j + 32)); |
| 393 | + __m128i chunk4 = _mm_loadu_si128((const __m128i *)(s_chk + j + 48)); |
| 394 | + __m128i chunk1_shifted = _mm_xor_si128(chunk1, high_bit2); |
| 395 | + __m128i chunk2_shifted = _mm_xor_si128(chunk2, high_bit2); |
| 396 | + __m128i chunk3_shifted = _mm_xor_si128(chunk3, high_bit2); |
| 397 | + __m128i chunk4_shifted = _mm_xor_si128(chunk4, high_bit2); |
| 398 | + __m128i result_mask1 = _mm_cmplt_epi8(chunk1_shifted, limit_shifted2); |
| 399 | + __m128i result_mask2 = _mm_cmplt_epi8(chunk2_shifted, limit_shifted2); |
| 400 | + __m128i result_mask3 = _mm_cmplt_epi8(chunk3_shifted, limit_shifted2); |
| 401 | + __m128i result_mask4 = _mm_cmplt_epi8(chunk4_shifted, limit_shifted2); |
| 402 | + if (_mm_movemask_epi8(result_mask1) != 0 || _mm_movemask_epi8(result_mask2) != 0 || |
| 403 | + _mm_movemask_epi8(result_mask3) != 0 || _mm_movemask_epi8(result_mask4) != 0) { |
| 404 | + return false; |
| 405 | + } |
| 406 | + } |
| 407 | + for (; j + offset2 <= len_chk; j += offset2) { |
| 408 | + __m128i chunk = _mm_loadu_si128((const __m128i *)(s_chk + j)); |
| 409 | + __m128i chunk_shifted = _mm_xor_si128(chunk, high_bit2); |
| 410 | + __m128i result_mask = _mm_cmplt_epi8(chunk_shifted, limit_shifted2); |
| 411 | + if (_mm_movemask_epi8(result_mask) != 0) { |
| 412 | + return false; |
| 413 | + } |
| 414 | + } |
| 415 | +#else |
| 416 | + const size_t offset2 = 1; |
| 417 | +#endif |
| 418 | + for (; j < len_chk; j++) { |
| 419 | + if ((unsigned char)s_chk[j] < 0x20) { |
| 420 | + return false; |
| 421 | + } |
| 422 | + } |
| 423 | + } |
437 | 424 | #endif |
438 | 425 | if (*p == '\"') { |
439 | 426 | v->u.string.len = p - *s - 1; |
|
0 commit comments