@@ -428,14 +428,15 @@ enum NfaState {
428428 InQuotedField = 3 ,
429429 InEscapedQuote = 4 ,
430430 InDoubleEscapedQuote = 5 ,
431- InComment = 6 ,
431+ InEscapeSequence = 6 ,
432+ InComment = 7 ,
432433 // All states below are "final field" states.
433434 // Namely, they indicate that a field has been parsed.
434- EndFieldDelim = 7 ,
435+ EndFieldDelim = 8 ,
435436 // All states below are "final record" states.
436437 // Namely, they indicate that a record has been parsed.
437- EndRecord = 8 ,
438- CRLF = 9 ,
438+ EndRecord = 9 ,
439+ CRLF = 10 ,
439440}
440441
441442/// A list of NFA states that have an explicit representation in the DFA.
@@ -447,6 +448,7 @@ const NFA_STATES: &'static [NfaState] = &[
447448 NfaState :: InQuotedField ,
448449 NfaState :: InEscapedQuote ,
449450 NfaState :: InDoubleEscapedQuote ,
451+ NfaState :: InEscapeSequence ,
450452 NfaState :: InComment ,
451453 NfaState :: EndRecord ,
452454 NfaState :: CRLF ,
@@ -805,9 +807,9 @@ impl Reader {
805807 self . dfa . classes . add ( self . delimiter ) ;
806808 if self . quoting {
807809 self . dfa . classes . add ( self . quote ) ;
808- if let Some ( escape ) = self . escape {
809- self . dfa . classes . add ( escape) ;
810- }
810+ }
811+ if let Some ( escape ) = self . escape {
812+ self . dfa . classes . add ( escape ) ;
811813 }
812814 if let Some ( comment) = self . comment {
813815 self . dfa . classes . add ( comment) ;
@@ -970,7 +972,7 @@ impl Reader {
970972 match state {
971973 End | StartRecord | EndRecord | InComment | CRLF => End ,
972974 StartField | EndFieldDelim | EndFieldTerm | InField
973- | InQuotedField | InEscapedQuote | InDoubleEscapedQuote
975+ | InQuotedField | InEscapedQuote | InDoubleEscapedQuote | InEscapeSequence
974976 | InRecordTerm => EndRecord ,
975977 }
976978 }
@@ -1007,6 +1009,8 @@ impl Reader {
10071009 ( EndFieldDelim , NfaInputAction :: Discard )
10081010 } else if self . term . equals ( c) {
10091011 ( EndFieldTerm , NfaInputAction :: Epsilon )
1012+ } else if !self . quoting && self . escape == Some ( c) {
1013+ ( InEscapeSequence , NfaInputAction :: Discard )
10101014 } else {
10111015 ( InField , NfaInputAction :: CopyToOutput )
10121016 }
@@ -1018,6 +1022,8 @@ impl Reader {
10181022 ( EndFieldDelim , NfaInputAction :: Discard )
10191023 } else if self . term . equals ( c) {
10201024 ( EndFieldTerm , NfaInputAction :: Epsilon )
1025+ } else if !self . quoting && self . escape == Some ( c) {
1026+ ( InEscapeSequence , NfaInputAction :: Discard )
10211027 } else {
10221028 ( InField , NfaInputAction :: CopyToOutput )
10231029 }
@@ -1043,6 +1049,7 @@ impl Reader {
10431049 ( InField , NfaInputAction :: CopyToOutput )
10441050 }
10451051 }
1052+ InEscapeSequence => ( InField , NfaInputAction :: CopyToOutput ) ,
10461053 InComment => {
10471054 if b'\n' == c {
10481055 ( StartRecord , NfaInputAction :: Discard )
@@ -1087,7 +1094,7 @@ impl Reader {
10871094/// be reached by epsilon transitions will never have explicit usage in the
10881095/// DFA.
10891096const TRANS_CLASSES : usize = 7 ;
1090- const DFA_STATES : usize = 10 ;
1097+ const DFA_STATES : usize = 11 ;
10911098const TRANS_SIZE : usize = TRANS_CLASSES * DFA_STATES ;
10921099
10931100/// The number of possible transition classes. (See the comment on `TRANS_SIZE`
@@ -1119,6 +1126,8 @@ struct Dfa {
11191126 in_field : DfaState ,
11201127 /// The DFA state corresponding to being inside an quoted field.
11211128 in_quoted : DfaState ,
1129+ /// The DFA state corresponding to being in an escape sequence.
1130+ in_escape_sequence : DfaState ,
11221131 /// The minimum DFA state that indicates a field has been parsed. All DFA
11231132 /// states greater than this are also final-field states.
11241133 final_field : DfaState ,
@@ -1135,6 +1144,7 @@ impl Dfa {
11351144 classes : DfaClasses :: new ( ) ,
11361145 in_field : DfaState ( 0 ) ,
11371146 in_quoted : DfaState ( 0 ) ,
1147+ in_escape_sequence : DfaState ( 0 ) ,
11381148 final_field : DfaState ( 0 ) ,
11391149 final_record : DfaState ( 0 ) ,
11401150 }
@@ -1170,6 +1180,7 @@ impl Dfa {
11701180 fn finish ( & mut self ) {
11711181 self . in_field = self . new_state ( NfaState :: InField ) ;
11721182 self . in_quoted = self . new_state ( NfaState :: InQuotedField ) ;
1183+ self . in_escape_sequence = self . new_state ( NfaState :: InEscapeSequence ) ;
11731184 self . final_field = self . new_state ( NfaState :: EndFieldDelim ) ;
11741185 self . final_record = self . new_state ( NfaState :: EndRecord ) ;
11751186 }
@@ -1665,6 +1676,15 @@ mod tests {
16651676 }
16661677 ) ;
16671678
1679+ parses_to ! (
1680+ escape_sequence,
1681+ "a\\ ,b\\ \\ c,\\ ,fo\" o\\ ,,bar" ,
1682+ csv![ [ "a,b\\ c" , ",fo\" o," , "bar" ] ] ,
1683+ |b: & mut ReaderBuilder | {
1684+ b. quoting( false ) . escape( Some ( b'\\' ) ) ;
1685+ }
1686+ ) ;
1687+
16681688 parses_to ! (
16691689 delimiter_tabs,
16701690 "a\t b" ,
@@ -1863,6 +1883,25 @@ mod tests {
18631883 assert_read ! ( rdr, & [ ] , out, 0 , 0 , End ) ;
18641884 }
18651885
1886+ // Test we can read escape sequences correctly in a stream.
1887+ #[ test]
1888+ fn stream_escape_sequence ( ) {
1889+ use crate :: ReadFieldResult :: * ;
1890+
1891+ let out = & mut [ 0 ; 10 ] ;
1892+ let mut builder = ReaderBuilder :: new ( ) ;
1893+ let mut rdr = builder. quoting ( false ) . escape ( Some ( b'\\' ) ) . build ( ) ;
1894+
1895+ assert_read ! ( rdr, b( "\\ ,f\\ \\ o\\ " ) , out, 7 , 4 , InputEmpty ) ;
1896+ assert_eq ! ( & out[ ..4 ] , b( ",f\\ o" ) ) ;
1897+
1898+ assert_read ! ( rdr, b( ",o\\ ," ) , & mut out[ 4 ..] , 4 , 3 , InputEmpty ) ;
1899+ assert_eq ! ( & out[ ..7 ] , b( ",f\\ o,o," ) ) ;
1900+
1901+ assert_read ! ( rdr, & [ ] , out, 0 , 0 , Field { record_end: true } ) ;
1902+ assert_read ! ( rdr, & [ ] , out, 0 , 0 , End ) ;
1903+ }
1904+
18661905 // Test that empty output buffers don't wreak havoc.
18671906 #[ test]
18681907 fn stream_empty_output ( ) {
0 commit comments