@@ -17,17 +17,19 @@ const makeSegmenter = cached((locale: string) => ({
1717 * Split a string into sentences, respecting common abbreviations.
1818 */
1919export function * splitBySentence (
20- input : string ,
20+ rawInput : string ,
2121 locale : Intl . LocalesArgument = "en"
2222) : Generator < Intl . SegmentData > {
23- if ( ! input || typeof input !== "string" )
23+ if ( ! rawInput || typeof rawInput !== "string" )
2424 throw new TypeError ( "input must be a string" )
2525
2626 const { abbreviations, segmenter } = makeSegmenter ( locale . toString ( ) )
2727 const rLastWord = / (?< = \s | ^ ) \S + (? = \s + $ ) /
28+ const input = rawInput . replaceAll ( / (?< = \. \s + ) \S / g, ( char ) =>
29+ char . toLocaleUpperCase ( )
30+ )
2831
29- let continuationIndex : number | undefined
30- let continuation = ""
32+ let left = 0
3133 for ( const { segment, index } of segmenter . segment ( input ) ) {
3234 const match = segment . match ( rLastWord )
3335
@@ -37,20 +39,17 @@ export function* splitBySentence(
3739 ( abbreviations . has ( match [ 0 ] . toLocaleLowerCase ( locale ) ) ||
3840 // 2. A closing parenthesis without a period.
3941 match [ 0 ] . endsWith ( ")" ) )
40- ) {
41- continuationIndex = continuationIndex ?? index
42- continuation += segment
42+ )
4343 continue
44- }
4544
45+ const right = index + segment . length
4646 yield {
47- segment : continuation + segment ,
48- index : continuationIndex ?? index ,
49- input,
47+ segment : rawInput . slice ( left , right ) ,
48+ index : left ,
49+ input : rawInput ,
5050 }
5151
52- continuation = ""
53- continuationIndex = undefined
52+ left = right
5453 }
5554}
5655
0 commit comments