@@ -268,54 +268,110 @@ def _json_extract_value_array_sql(
268268 return self .sql (exp .cast (json_extract , to = exp .DataType .build (data_type )))
269269
270270
271- def _initcap_sql (self : DuckDB .Generator , expression : exp .Initcap ) -> str :
272- def build_capitalize_sql (
273- value_to_split : str , delimiters_sql : str , convert_delim_to_regex : bool = True
274- ) -> str :
275- # empty string delimiter --> treat value as one word, no need to split
276- if delimiters_sql == "''" :
277- return f"UPPER(LEFT({ value_to_split } , 1)) || LOWER(SUBSTR({ value_to_split } , 2))"
278-
279- delim_regex_sql = delimiters_sql
280- split_regex_sql = delimiters_sql
281- if convert_delim_to_regex :
282- delim_regex_sql = f"CONCAT('[', { delimiters_sql } , ']')"
283- split_regex_sql = f"CONCAT('([', { delimiters_sql } , ']+|[^', { delimiters_sql } , ']+)')"
284-
285- # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
286- # We do not know whether the first segment is a delimiter or not, so we check the first character of the string
287- # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
288- return self .func (
289- "ARRAY_TO_STRING" ,
290- exp .case ()
291- .when (
292- f"REGEXP_MATCHES(LEFT({ value_to_split } , 1), { delim_regex_sql } )" ,
293- self .func (
294- "LIST_TRANSFORM" ,
295- self .func ("REGEXP_EXTRACT_ALL" , value_to_split , split_regex_sql ),
296- "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTR(seg, 2)) ELSE seg END" ,
297- ),
298- )
299- .else_ (
300- self .func (
301- "LIST_TRANSFORM" ,
302- self .func ("REGEXP_EXTRACT_ALL" , value_to_split , split_regex_sql ),
303- "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTR(seg, 2)) ELSE seg END" ,
304- ),
271+ def _escape_regex_metachars (
272+ self : DuckDB .Generator , delimiters : t .Optional [exp .Expression ], delimiters_sql : str
273+ ) -> str :
274+ if not delimiters :
275+ return delimiters_sql
276+
277+ REGEX_LITERAL_ESCAPES = {
278+ "\\ " : "\\ \\ " , # literals need two slashes inside []
279+ "-" : "\\ -" ,
280+ "^" : "\\ ^" ,
281+ "[" : "\\ [" ,
282+ "]" : "\\ ]" ,
283+ }
284+
285+ if isinstance (delimiters , exp .Literal ) and delimiters .is_string :
286+ literal_value = delimiters .this
287+ escaped_literal = "" .join (REGEX_LITERAL_ESCAPES .get (ch , ch ) for ch in literal_value )
288+ return self .sql (exp .Literal .string (escaped_literal ))
289+
290+ REGEX_ESCAPE_REPLACEMENTS = (
291+ ("\\ " , "\\ \\ " ),
292+ ("-" , r"\-" ),
293+ ("^" , r"\^" ),
294+ ("[" , r"\[" ),
295+ ("]" , r"\]" ),
296+ )
297+
298+ escaped_sql = delimiters_sql
299+ for raw , escaped in REGEX_ESCAPE_REPLACEMENTS :
300+ escaped_sql = self .func (
301+ "REPLACE" ,
302+ escaped_sql ,
303+ self .sql (exp .Literal .string (raw )),
304+ self .sql (exp .Literal .string (escaped )),
305+ )
306+
307+ return escaped_sql
308+
309+
310+ def _build_capitalization_sql (
311+ self : DuckDB .Generator ,
312+ value_to_split : str ,
313+ raw_delimiters_sql : str ,
314+ escaped_delimiters_sql : t .Optional [str ] = None ,
315+ convert_delim_to_regex : bool = True ,
316+ ) -> str :
317+ # empty string delimiter --> treat value as one word, no need to split
318+ if raw_delimiters_sql == "''" :
319+ return f"UPPER(LEFT({ value_to_split } , 1)) || LOWER(SUBSTR({ value_to_split } , 2))"
320+
321+ regex_ready_sql = escaped_delimiters_sql or raw_delimiters_sql
322+ delim_regex_sql = regex_ready_sql
323+ split_regex_sql = regex_ready_sql
324+ if convert_delim_to_regex :
325+ delim_regex_sql = f"CONCAT('[', { regex_ready_sql } , ']')"
326+ split_regex_sql = f"CONCAT('([', { regex_ready_sql } , ']+|[^', { regex_ready_sql } , ']+)')"
327+
328+ # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
329+ # We do not know whether the first segment is a delimiter or not, so we check the first character of the string
330+ # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
331+ return self .func (
332+ "ARRAY_TO_STRING" ,
333+ exp .case ()
334+ .when (
335+ f"REGEXP_MATCHES(LEFT({ value_to_split } , 1), { delim_regex_sql } )" ,
336+ self .func (
337+ "LIST_TRANSFORM" ,
338+ self .func ("REGEXP_EXTRACT_ALL" , value_to_split , split_regex_sql ),
339+ "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTR(seg, 2)) ELSE seg END" ,
305340 ),
306- "''" ,
307341 )
342+ .else_ (
343+ self .func (
344+ "LIST_TRANSFORM" ,
345+ self .func ("REGEXP_EXTRACT_ALL" , value_to_split , split_regex_sql ),
346+ "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTR(seg, 2)) ELSE seg END" ,
347+ ),
348+ ),
349+ "''" ,
350+ )
308351
352+
353+ def _initcap_sql (self : DuckDB .Generator , expression : exp .Initcap ) -> str :
309354 this_sql = self .sql (expression , "this" )
310355 delimiters = expression .args .get ("expression" )
311356 delimiters_sql = self .sql (delimiters )
357+ escaped_delimiters_sql = (
358+ _escape_regex_metachars (self , delimiters , delimiters_sql )
359+ if not isinstance (delimiters , exp .Null )
360+ else delimiters_sql
361+ )
312362
313363 if delimiters and (isinstance (delimiters , exp .Literal ) and delimiters .is_string ):
314- return f"CASE WHEN { this_sql } IS NULL THEN NULL ELSE { build_capitalize_sql (this_sql , delimiters_sql )} END"
364+ return (
365+ f"CASE WHEN { this_sql } IS NULL THEN NULL ELSE "
366+ f"{ _build_capitalization_sql (self , this_sql , delimiters_sql , escaped_delimiters_sql )} END"
367+ )
315368
316- # delimiters arg is SQL expression or NULL
317- capitalize_sql = build_capitalize_sql (
318- this_sql , delimiters_sql , convert_delim_to_regex = not isinstance (delimiters , exp .Null )
369+ capitalize_sql = _build_capitalization_sql (
370+ self ,
371+ this_sql ,
372+ delimiters_sql ,
373+ escaped_delimiters_sql ,
374+ convert_delim_to_regex = not isinstance (delimiters , exp .Null ),
319375 )
320376 return f"CASE WHEN { this_sql } IS NULL OR { delimiters_sql } IS NULL THEN NULL ELSE { capitalize_sql } END"
321377
0 commit comments