Skip to content

Commit 6f3d363

Browse files
simplifications
1 parent 737e1b8 commit 6f3d363

File tree

1 file changed

+70
-83
lines changed

1 file changed

+70
-83
lines changed

src/json.c

Lines changed: 70 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Created:
66
* April 12, 1961 at 09:07:34 PM GMT+3
77
* Modified:
8-
* February 16, 2026 at 2:02:11 PM GMT+3
8+
* February 16, 2026 at 4:01:29 PM GMT+3
99
*
1010
*/
1111
/*
@@ -221,86 +221,7 @@ static INLINE bool INLINE_ATTRIBUTE parse_number(const char **s, json_value *v)
221221
return true;
222222
}
223223

224-
#ifdef STRING_VALIDATION
225-
static INLINE bool INLINE_ATTRIBUTE validate_string_chunk(const char *s, size_t len) {
226-
size_t i = 0;
227-
228-
#if defined(__AVX2__)
229-
const size_t offset = 32;
230-
const __m256i limit = _mm256_set1_epi8(0x20);
231-
const __m256i high_bit = _mm256_set1_epi8(0x80);
232-
const __m256i limit_shifted = _mm256_xor_si256(limit, high_bit);
233-
/* Process 64 bytes per iteration (2 AVX2 registers) */
234-
for (; i + 64 <= len; i += 64) {
235-
__m256i chunk1 = _mm256_loadu_si256((const __m256i *)(s + i));
236-
__m256i chunk2 = _mm256_loadu_si256((const __m256i *)(s + i + 32));
237-
__m256i chunk1_shifted = _mm256_xor_si256(chunk1, high_bit);
238-
__m256i chunk2_shifted = _mm256_xor_si256(chunk2, high_bit);
239-
__m256i result_mask1 = _mm256_cmpgt_epi8(limit_shifted, chunk1_shifted);
240-
__m256i result_mask2 = _mm256_cmpgt_epi8(limit_shifted, chunk2_shifted);
241-
if (_mm256_movemask_epi8(result_mask1) != 0 || _mm256_movemask_epi8(result_mask2) != 0) {
242-
return false;
243-
}
244-
}
245-
/* Process remaining chunks of 32 bytes */
246-
for (; i + offset <= len; i += offset) {
247-
__m256i chunk = _mm256_loadu_si256((const __m256i *)(s + i));
248-
__m256i chunk_shifted = _mm256_xor_si256(chunk, high_bit);
249-
__m256i result_mask = _mm256_cmpgt_epi8(limit_shifted, chunk_shifted);
250-
if (_mm256_movemask_epi8(result_mask) != 0) {
251-
return false;
252-
}
253-
}
254-
#elif defined(__SSE2__)
255-
const size_t offset = 16;
256-
const __m128i limit = _mm_set1_epi8(0x20);
257-
const __m128i high_bit = _mm_set1_epi8(0x80);
258-
const __m128i limit_shifted = _mm_xor_si128(limit, high_bit);
259-
/* Process 64 bytes per iteration (4 SSE2 registers) */
260-
for (; i + 64 <= len; i += 64) {
261-
__m128i chunk1 = _mm_loadu_si128((const __m128i *)(s + i));
262-
__m128i chunk2 = _mm_loadu_si128((const __m128i *)(s + i + 16));
263-
__m128i chunk3 = _mm_loadu_si128((const __m128i *)(s + i + 32));
264-
__m128i chunk4 = _mm_loadu_si128((const __m128i *)(s + i + 48));
265-
__m128i chunk1_shifted = _mm_xor_si128(chunk1, high_bit);
266-
__m128i chunk2_shifted = _mm_xor_si128(chunk2, high_bit);
267-
__m128i chunk3_shifted = _mm_xor_si128(chunk3, high_bit);
268-
__m128i chunk4_shifted = _mm_xor_si128(chunk4, high_bit);
269-
__m128i result_mask1 = _mm_cmplt_epi8(chunk1_shifted, limit_shifted);
270-
__m128i result_mask2 = _mm_cmplt_epi8(chunk2_shifted, limit_shifted);
271-
__m128i result_mask3 = _mm_cmplt_epi8(chunk3_shifted, limit_shifted);
272-
__m128i result_mask4 = _mm_cmplt_epi8(chunk4_shifted, limit_shifted);
273-
if (_mm_movemask_epi8(result_mask1) != 0 || _mm_movemask_epi8(result_mask2) != 0 ||
274-
_mm_movemask_epi8(result_mask3) != 0 || _mm_movemask_epi8(result_mask4) != 0) {
275-
return false;
276-
}
277-
}
278-
/* Process remaining chunks of 16 bytes */
279-
for (; i + offset <= len; i += offset) {
280-
__m128i chunk = _mm_loadu_si128((const __m128i *)(s + i));
281-
__m128i chunk_shifted = _mm_xor_si128(chunk, high_bit);
282-
__m128i result_mask = _mm_cmplt_epi8(chunk_shifted, limit_shifted);
283-
if (_mm_movemask_epi8(result_mask) != 0) {
284-
return false;
285-
}
286-
}
287-
#else
288-
/* scalar fallback */
289-
const size_t offset = 1;
290-
#endif
291-
292-
/* scalar tail */
293-
for (; i < len; i++) {
294-
if ((unsigned char)s[i] < 0x20) {
295-
return false;
296-
}
297-
}
298-
return true;
299-
}
300-
#endif
301-
302-
303-
224+
/* STRING_VALIDATION inlined into parse_string below. */
304225
static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end, json_value *v) {
305226
const char *p = *s + 1;
306227
v->u.string.ptr = p;
@@ -432,8 +353,74 @@ static INLINE bool INLINE_ATTRIBUTE parse_string(const char **s, const char *end
432353
if (p == end)
433354
return false;
434355
#ifdef STRING_VALIDATION
435-
if (!validate_string_chunk(p - span, span))
436-
return false;
356+
{
357+
const char *s_chk = p - span;
358+
size_t len_chk = span;
359+
size_t j = 0;
360+
#if defined(__AVX2__)
361+
const size_t offset2 = 32;
362+
const __m256i limit2 = _mm256_set1_epi8(0x20);
363+
const __m256i high_bit2 = _mm256_set1_epi8(0x80);
364+
const __m256i limit_shifted2 = _mm256_xor_si256(limit2, high_bit2);
365+
for (; j + 64 <= len_chk; j += 64) {
366+
__m256i chunk1 = _mm256_loadu_si256((const __m256i *)(s_chk + j));
367+
__m256i chunk2 = _mm256_loadu_si256((const __m256i *)(s_chk + j + 32));
368+
__m256i chunk1_shifted = _mm256_xor_si256(chunk1, high_bit2);
369+
__m256i chunk2_shifted = _mm256_xor_si256(chunk2, high_bit2);
370+
__m256i result_mask1 = _mm256_cmpgt_epi8(limit_shifted2, chunk1_shifted);
371+
__m256i result_mask2 = _mm256_cmpgt_epi8(limit_shifted2, chunk2_shifted);
372+
if (_mm256_movemask_epi8(result_mask1) != 0 || _mm256_movemask_epi8(result_mask2) != 0) {
373+
return false;
374+
}
375+
}
376+
for (; j + offset2 <= len_chk; j += offset2) {
377+
__m256i chunk = _mm256_loadu_si256((const __m256i *)(s_chk + j));
378+
__m256i chunk_shifted = _mm256_xor_si256(chunk, high_bit2);
379+
__m256i result_mask = _mm256_cmpgt_epi8(limit_shifted2, chunk_shifted);
380+
if (_mm256_movemask_epi8(result_mask) != 0) {
381+
return false;
382+
}
383+
}
384+
#elif defined(__SSE2__)
385+
const size_t offset2 = 16;
386+
const __m128i limit2 = _mm_set1_epi8(0x20);
387+
const __m128i high_bit2 = _mm_set1_epi8(0x80);
388+
const __m128i limit_shifted2 = _mm_xor_si128(limit2, high_bit2);
389+
for (; j + 64 <= len_chk; j += 64) {
390+
__m128i chunk1 = _mm_loadu_si128((const __m128i *)(s_chk + j));
391+
__m128i chunk2 = _mm_loadu_si128((const __m128i *)(s_chk + j + 16));
392+
__m128i chunk3 = _mm_loadu_si128((const __m128i *)(s_chk + j + 32));
393+
__m128i chunk4 = _mm_loadu_si128((const __m128i *)(s_chk + j + 48));
394+
__m128i chunk1_shifted = _mm_xor_si128(chunk1, high_bit2);
395+
__m128i chunk2_shifted = _mm_xor_si128(chunk2, high_bit2);
396+
__m128i chunk3_shifted = _mm_xor_si128(chunk3, high_bit2);
397+
__m128i chunk4_shifted = _mm_xor_si128(chunk4, high_bit2);
398+
__m128i result_mask1 = _mm_cmplt_epi8(chunk1_shifted, limit_shifted2);
399+
__m128i result_mask2 = _mm_cmplt_epi8(chunk2_shifted, limit_shifted2);
400+
__m128i result_mask3 = _mm_cmplt_epi8(chunk3_shifted, limit_shifted2);
401+
__m128i result_mask4 = _mm_cmplt_epi8(chunk4_shifted, limit_shifted2);
402+
if (_mm_movemask_epi8(result_mask1) != 0 || _mm_movemask_epi8(result_mask2) != 0 ||
403+
_mm_movemask_epi8(result_mask3) != 0 || _mm_movemask_epi8(result_mask4) != 0) {
404+
return false;
405+
}
406+
}
407+
for (; j + offset2 <= len_chk; j += offset2) {
408+
__m128i chunk = _mm_loadu_si128((const __m128i *)(s_chk + j));
409+
__m128i chunk_shifted = _mm_xor_si128(chunk, high_bit2);
410+
__m128i result_mask = _mm_cmplt_epi8(chunk_shifted, limit_shifted2);
411+
if (_mm_movemask_epi8(result_mask) != 0) {
412+
return false;
413+
}
414+
}
415+
#else
416+
const size_t offset2 = 1;
417+
#endif
418+
for (; j < len_chk; j++) {
419+
if ((unsigned char)s_chk[j] < 0x20) {
420+
return false;
421+
}
422+
}
423+
}
437424
#endif
438425
if (*p == '\"') {
439426
v->u.string.len = p - *s - 1;

0 commit comments

Comments
 (0)