@@ -352,12 +352,14 @@ function splitTrailingForwardStickyCluster(text: string): { head: string, tail:
352352 }
353353}
354354
355- function isRepeatedSingleCharRun ( segment : string , ch : string ) : boolean {
356- if ( segment . length === 0 ) return false
357- for ( const part of segment ) {
358- if ( part !== ch ) return false
359- }
360- return true
355+ function getRepeatableSingleCharRunChar (
356+ text : string ,
357+ isWordLike : boolean ,
358+ kind : SegmentBreakKind ,
359+ ) : string | null {
360+ return kind === 'text' && ! isWordLike && text . length === 1 && text !== '-' && text !== '—'
361+ ? text
362+ : null
361363}
362364
363365function endsWithArabicNoSpacePunctuation ( segment : string ) : boolean {
@@ -883,10 +885,14 @@ function buildMergedSegmentation(
883885 const mergedWordLike : boolean [ ] = [ ]
884886 const mergedKinds : SegmentBreakKind [ ] = [ ]
885887 const mergedStarts : number [ ] = [ ]
888+ // Track repeatable single-char punctuation runs structurally so identical
889+ // merges stay O(1) instead of re-scanning the accumulated segment each time.
890+ const mergedSingleCharRunChars : ( string | null ) [ ] = [ ]
886891
887892 for ( const s of wordSegmenter . segment ( normalized ) ) {
888893 for ( const piece of splitSegmentByBreakKind ( s . segment , s . isWordLike ?? false , s . index , whiteSpaceProfile ) ) {
889894 const isText = piece . kind === 'text'
895+ const repeatableSingleCharRunChar = getRepeatableSingleCharRunChar ( piece . text , piece . isWordLike , piece . kind )
890896
891897 // First-pass keeps: no-space script-specific joins and punctuation glue
892898 // that depend on the immediately preceding text run.
@@ -901,6 +907,7 @@ function buildMergedSegmentation(
901907 ) {
902908 mergedTexts [ mergedLen - 1 ] += piece . text
903909 mergedWordLike [ mergedLen - 1 ] = mergedWordLike [ mergedLen - 1 ] ! || piece . isWordLike
910+ mergedSingleCharRunChars [ mergedLen - 1 ] = null
904911 } else if (
905912 isText &&
906913 mergedLen > 0 &&
@@ -910,6 +917,7 @@ function buildMergedSegmentation(
910917 ) {
911918 mergedTexts [ mergedLen - 1 ] += piece . text
912919 mergedWordLike [ mergedLen - 1 ] = mergedWordLike [ mergedLen - 1 ] ! || piece . isWordLike
920+ mergedSingleCharRunChars [ mergedLen - 1 ] = null
913921 } else if (
914922 isText &&
915923 mergedLen > 0 &&
@@ -918,6 +926,7 @@ function buildMergedSegmentation(
918926 ) {
919927 mergedTexts [ mergedLen - 1 ] += piece . text
920928 mergedWordLike [ mergedLen - 1 ] = mergedWordLike [ mergedLen - 1 ] ! || piece . isWordLike
929+ mergedSingleCharRunChars [ mergedLen - 1 ] = null
921930 } else if (
922931 isText &&
923932 mergedLen > 0 &&
@@ -928,15 +937,12 @@ function buildMergedSegmentation(
928937 ) {
929938 mergedTexts [ mergedLen - 1 ] += piece . text
930939 mergedWordLike [ mergedLen - 1 ] = true
940+ mergedSingleCharRunChars [ mergedLen - 1 ] = null
931941 } else if (
932- isText &&
933- ! piece . isWordLike &&
942+ repeatableSingleCharRunChar !== null &&
934943 mergedLen > 0 &&
935944 mergedKinds [ mergedLen - 1 ] === 'text' &&
936- piece . text . length === 1 &&
937- piece . text !== '-' &&
938- piece . text !== '—' &&
939- isRepeatedSingleCharRun ( mergedTexts [ mergedLen - 1 ] ! , piece . text )
945+ mergedSingleCharRunChars [ mergedLen - 1 ] === repeatableSingleCharRunChar
940946 ) {
941947 mergedTexts [ mergedLen - 1 ] += piece . text
942948 } else if (
@@ -950,11 +956,13 @@ function buildMergedSegmentation(
950956 )
951957 ) {
952958 mergedTexts [ mergedLen - 1 ] += piece . text
959+ mergedSingleCharRunChars [ mergedLen - 1 ] = null
953960 } else {
954961 mergedTexts [ mergedLen ] = piece . text
955962 mergedWordLike [ mergedLen ] = piece . isWordLike
956963 mergedKinds [ mergedLen ] = piece . kind
957964 mergedStarts [ mergedLen ] = piece . start
965+ mergedSingleCharRunChars [ mergedLen ] = repeatableSingleCharRunChar
958966 mergedLen ++
959967 }
960968 }
0 commit comments