Skip to content

Commit 40b1988

Browse files
committed
Handle hive special control chars
1 parent 2073fdd commit 40b1988

File tree

5 files changed

+86
-95
lines changed

5 files changed

+86
-95
lines changed

sqlglot/dialects/duckdb.py

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@
5858
"]": r"\]",
5959
}
6060

61+
# Whitespace control characters that DuckDB must process with `CHR({val})` calls
62+
WS_CONTROL_CHARS_TO_DUCK = {
63+
"\u000b": 11,
64+
"\u001c": 28,
65+
"\u001d": 29,
66+
"\u001e": 30,
67+
"\u001f": 31,
68+
}
69+
6170

6271
# BigQuery -> DuckDB conversion for the DATE function
6372
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
@@ -299,16 +308,49 @@ def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str:
299308
return self.function_fallback_sql(expression)
300309

301310

311+
def _literal_sql_with_ws_chr(self: DuckDB.Generator, literal: str) -> str:
312+
# DuckDB does not support \uXXXX escapes, so rebuild literals with CHR() for special whitespace controls.
313+
if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
314+
return self.sql(exp.Literal.string(literal))
315+
316+
sql_segments: t.List[str] = []
317+
literal_chars: t.List[str] = []
318+
319+
for ch in literal:
320+
duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK.get(ch)
321+
if not duckdb_char_code:
322+
literal_chars.append(ch)
323+
continue
324+
325+
if literal_chars:
326+
sql_segments.append(self.sql(exp.Literal.string("".join(literal_chars))))
327+
literal_chars.clear()
328+
329+
sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
330+
331+
if literal_chars:
332+
sql_segments.append(self.sql(exp.Literal.string("".join(literal_chars))))
333+
334+
sql = " || ".join(sql_segments)
335+
return sql if len(sql_segments) == 1 else f"({sql})"
336+
337+
302338
def _escape_regex_metachars(
303339
self: DuckDB.Generator, delimiters: t.Optional[exp.Expression], delimiters_sql: str
304340
) -> str:
341+
r"""
342+
Escapes regex metacharacters in delimiter strings for use in character classes [].
343+
344+
Handles: \ - ^ [ ] which have special meaning in regex character classes.
345+
For literals: escapes at transpile time. For dynamic expressions: generates REPLACE() calls.
346+
"""
305347
if not delimiters:
306348
return delimiters_sql
307349

308350
if delimiters.is_string:
309351
literal_value = delimiters.this
310352
escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
311-
return self.sql(exp.Literal.string(escaped_literal))
353+
return _literal_sql_with_ws_chr(self, escaped_literal)
312354

313355
escaped_sql = delimiters_sql
314356
for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
@@ -325,20 +367,14 @@ def _escape_regex_metachars(
325367
def _build_capitalization_sql(
326368
self: DuckDB.Generator,
327369
value_to_split: str,
328-
raw_delimiters_sql: str,
329-
escaped_delimiters_sql: t.Optional[str] = None,
330-
convert_delim_to_regex: bool = True,
370+
delimiters_sql: str,
331371
) -> str:
332372
# empty string delimiter --> treat value as one word, no need to split
333-
if raw_delimiters_sql == "''":
373+
if delimiters_sql == "''":
334374
return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
335375

336-
regex_ready_sql = escaped_delimiters_sql or raw_delimiters_sql
337-
delim_regex_sql = regex_ready_sql
338-
split_regex_sql = regex_ready_sql
339-
if convert_delim_to_regex:
340-
delim_regex_sql = f"CONCAT('[', {regex_ready_sql}, ']')"
341-
split_regex_sql = f"CONCAT('([', {regex_ready_sql}, ']+|[^', {regex_ready_sql}, ']+)')"
376+
delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
377+
split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
342378

343379
# REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
344380
# We do not know whether the first segment is a delimiter or not, so we check the first character of the string
@@ -369,19 +405,10 @@ def _initcap_sql(self: DuckDB.Generator, expression: exp.Initcap) -> str:
369405
this_sql = self.sql(expression, "this")
370406
delimiters = expression.args.get("expression")
371407
delimiters_sql = self.sql(delimiters)
372-
escaped_delimiters_sql = (
373-
_escape_regex_metachars(self, delimiters, delimiters_sql)
374-
if not isinstance(delimiters, exp.Null)
375-
else delimiters_sql
376-
)
377408

378-
return _build_capitalization_sql(
379-
self,
380-
this_sql,
381-
delimiters_sql,
382-
escaped_delimiters_sql,
383-
convert_delim_to_regex=not isinstance(delimiters, exp.Null),
384-
)
409+
escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
410+
411+
return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
385412

386413

387414
class DuckDB(Dialect):

sqlglot/dialects/hive.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ class Hive(Dialect):
219219

220220
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
221221

222+
# https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362046#LanguageManualUDF-StringFunctions
223+
# https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java#L266-L269
224+
INITCAP_DEFAULT_DELIMITER_CHARS = " \t\n\r\f\u000b\u001c\u001d\u001e\u001f"
225+
222226
# Support only the non-ANSI mode (default for Hive, Spark2, Spark)
223227
COERCES_TO = defaultdict(set, deepcopy(TypeAnnotator.COERCES_TO))
224228
for target_type in {

sqlglot/dialects/spark2.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ class Spark2(Hive):
121121

122122
# https://spark.apache.org/docs/latest/api/sql/index.html#initcap
123123
# https://docs.databricks.com/aws/en/sql/language-manual/functions/initcap
124-
INITCAP_DEFAULT_DELIMITER_CHARS = " \t\n\r\f\v"
124+
# https://github.com/apache/spark/blob/master/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java#L859-L905
125+
INITCAP_DEFAULT_DELIMITER_CHARS = " "
125126

126127
class Tokenizer(Hive.Tokenizer):
127128
HEX_STRINGS = [("X'", "'"), ("x'", "'")]

tests/dialects/test_dialect.py

Lines changed: 28 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4313,18 +4313,13 @@ def test_initcap(self):
43134313
"spark": Spark2.INITCAP_DEFAULT_DELIMITER_CHARS,
43144314
}
43154315

4316-
REGEX_LITERAL_ESCAPES = {
4317-
"\\": "\\\\",
4318-
"-": "\\-",
4319-
"^": "\\^",
4320-
"[": "\\[",
4321-
"]": "\\]",
4316+
duckdb_default_delimiter_sql = {
4317+
"": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || (' \t\n\r\x0c' || CHR(11) || '!\"#$%&''()*+,\\\\\\-./:;<=>?@\\\\\\[\\\\\\]\\^_`{|}~') || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || (' \t\n\r\x0c' || CHR(11) || '!\"#$%&''()*+,\\\\\\-./:;<=>?@\\\\\\[\\\\\\]\\^_`{|}~') || ']+|[^' || (' \t\n\r\x0c' || CHR(11) || '!\"#$%&''()*+,\\\\\\-./:;<=>?@\\\\\\[\\\\\\]\\^_`{|}~') || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || (' \t\n\r\x0c' || CHR(11) || '!\"#$%&''()*+,\\\\\\-./:;<=>?@\\\\\\[\\\\\\]\\^_`{|}~') || ']+|[^' || (' \t\n\r\x0c' || CHR(11) || '!\"#$%&''()*+,\\\\\\-./:;<=>?@\\\\\\[\\\\\\]\\^_`{|}~') || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4318+
"bigquery": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || (' \t\n\r\x0c' || CHR(11) || '\\\\\\[\\\\\\](){}/|<>!?@\"\\^#$&~_,.:;*%+\\\\\\-') || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || (' \t\n\r\x0c' || CHR(11) || '\\\\\\[\\\\\\](){}/|<>!?@\"\\^#$&~_,.:;*%+\\\\\\-') || ']+|[^' || (' \t\n\r\x0c' || CHR(11) || '\\\\\\[\\\\\\](){}/|<>!?@\"\\^#$&~_,.:;*%+\\\\\\-') || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || (' \t\n\r\x0c' || CHR(11) || '\\\\\\[\\\\\\](){}/|<>!?@\"\\^#$&~_,.:;*%+\\\\\\-') || ']+|[^' || (' \t\n\r\x0c' || CHR(11) || '\\\\\\[\\\\\\](){}/|<>!?@\"\\^#$&~_,.:;*%+\\\\\\-') || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4319+
"snowflake": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || (' \t\n\r\x0c' || CHR(11) || '!?@\"\\^#$&~_,.:;+\\\\\\-*%/|\\\\\\[\\\\\\](){}<>') || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || (' \t\n\r\x0c' || CHR(11) || '!?@\"\\^#$&~_,.:;+\\\\\\-*%/|\\\\\\[\\\\\\](){}<>') || ']+|[^' || (' \t\n\r\x0c' || CHR(11) || '!?@\"\\^#$&~_,.:;+\\\\\\-*%/|\\\\\\[\\\\\\](){}<>') || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || (' \t\n\r\x0c' || CHR(11) || '!?@\"\\^#$&~_,.:;+\\\\\\-*%/|\\\\\\[\\\\\\](){}<>') || ']+|[^' || (' \t\n\r\x0c' || CHR(11) || '!?@\"\\^#$&~_,.:;+\\\\\\-*%/|\\\\\\[\\\\\\](){}<>') || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4320+
"spark": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || ' ' || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || ' ' || ']+|[^' || ' ' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || ' ' || ']+|[^' || ' ' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
43224321
}
43234322

4324-
def duckdb_regex_literal_sql(delimiters: str) -> str:
4325-
escaped_literal = "".join(REGEX_LITERAL_ESCAPES.get(ch, ch) for ch in delimiters)
4326-
return exp.Literal.string(escaped_literal).sql("duckdb")
4327-
43284323
# None delimiters arg doesn't error
43294324
with self.subTest("Testing INITCAP with None delimiters arg"):
43304325
self.assertEqual(exp.Initcap(this=exp.Literal.string("col")).sql(), "INITCAP('col')")
@@ -4355,17 +4350,10 @@ def duckdb_regex_literal_sql(delimiters: str) -> str:
43554350

43564351
for dialect, default_delimiters in delimiter_chars.items():
43574352
with self.subTest(f"DuckDB rewrite for {dialect or 'default'} default delimiters"):
4358-
escaped_literal = duckdb_regex_literal_sql(default_delimiters)
4359-
expected = (
4360-
"ARRAY_TO_STRING("
4361-
f"CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || {escaped_literal} || ']') "
4362-
f"THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || {escaped_literal} || ']+|[^' || {escaped_literal} || ']+)'), "
4363-
f"(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4364-
f"ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || {escaped_literal} || ']+|[^' || {escaped_literal} || ']+)'), "
4365-
f"(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4366-
"END, '')"
4353+
self.assertEqual(
4354+
parse_one("INITCAP(col)", read=dialect).sql("duckdb"),
4355+
duckdb_default_delimiter_sql[dialect],
43674356
)
4368-
self.assertEqual(parse_one("INITCAP(col)", read=dialect).sql("duckdb"), expected)
43694357

43704358
# DuckDB generation for BQ/Snowflake calls with custom delimiters arg
43714359
for dialect in ("bigquery", "snowflake"):
@@ -4378,49 +4366,41 @@ def duckdb_regex_literal_sql(delimiters: str) -> str:
43784366

43794367
query = "INITCAP(col, NULL)"
43804368
with self.subTest(f"DuckDB generation for {query} from {dialect}"):
4369+
# NULL delimiters generate verbose REPLACE calls but still evaluate to NULL correctly
4370+
escaped_null = r"REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(NULL, '\', '\\'), '-', '\-'), '^', '\^'), '[', '\['), ']', '\]')"
43814371
self.assertEqual(
43824372
parse_one(query, read=dialect).sql("duckdb"),
4383-
"ARRAY_TO_STRING("
4384-
"CASE WHEN REGEXP_MATCHES(LEFT(col, 1), NULL) "
4385-
"THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, NULL), "
4386-
"(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4387-
"ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, NULL), "
4388-
"(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4389-
"END, '')",
4373+
f"ARRAY_TO_STRING("
4374+
f"CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || {escaped_null} || ']') "
4375+
f"THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || {escaped_null} || ']+|[^' || {escaped_null} || ']+)'), "
4376+
f"(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4377+
f"ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || {escaped_null} || ']+|[^' || {escaped_null} || ']+)'), "
4378+
f"(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4379+
f"END, '')",
43904380
)
43914381

4392-
for custom_delimiter in (" ", "@", " _@", r"\\"):
4382+
custom_delimiter_expectations = {
4383+
" ": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || ' ' || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || ' ' || ']+|[^' || ' ' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || ' ' || ']+|[^' || ' ' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4384+
"@": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || '@' || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || '@' || ']+|[^' || '@' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || '@' || ']+|[^' || '@' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4385+
" _@": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || ' _@' || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || ' _@' || ']+|[^' || ' _@' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || ' _@' || ']+|[^' || ' _@' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4386+
r"\\": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || '\\\\\\\\' || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || '\\\\\\\\' || ']+|[^' || '\\\\\\\\' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || '\\\\\\\\' || ']+|[^' || '\\\\\\\\' || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4387+
"\u000b": "ARRAY_TO_STRING(CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || CHR(11) || ']') THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || CHR(11) || ']+|[^' || CHR(11) || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || CHR(11) || ']+|[^' || CHR(11) || ']+)'), (seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) END, '')",
4388+
}
4389+
for custom_delimiter, expected_duckdb_sql in custom_delimiter_expectations.items():
43934390
with self.subTest(
43944391
f"DuckDB generation for INITCAP(col, {custom_delimiter}) from {dialect}"
43954392
):
43964393
literal_sql = exp.Literal.string(custom_delimiter).sql(dialect)
43974394
expression = parse_one(f"INITCAP(col, {literal_sql})", read=dialect)
4398-
duckdb_sql = expression.sql("duckdb")
4399-
escaped_custom_delimiter = duckdb_regex_literal_sql(custom_delimiter)
44004395
self.assertEqual(
4401-
duckdb_sql,
4402-
"ARRAY_TO_STRING("
4403-
f"CASE WHEN REGEXP_MATCHES(LEFT(col, 1), '[' || {escaped_custom_delimiter} || ']') "
4404-
f"THEN LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || {escaped_custom_delimiter} || ']+|[^' || {escaped_custom_delimiter} || ']+)'), "
4405-
f"(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4406-
f"ELSE LIST_TRANSFORM(REGEXP_EXTRACT_ALL(col, '([' || {escaped_custom_delimiter} || ']+|[^' || {escaped_custom_delimiter} || ']+)'), "
4407-
f"(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END) "
4408-
"END, '')",
4396+
expression.sql("duckdb"),
4397+
expected_duckdb_sql,
44094398
)
44104399

4411-
def escape_expression_sql(sql: str) -> str:
4412-
escaped_sql = sql
4413-
for raw, escaped in REGEX_LITERAL_ESCAPES.items():
4414-
raw_sql = exp.Literal.string(raw).sql()
4415-
escaped_literal_sql = exp.Literal.string(escaped).sql()
4416-
escaped_sql = f"REPLACE({escaped_sql}, {raw_sql}, {escaped_literal_sql})"
4417-
4418-
return escaped_sql
4419-
44204400
with self.subTest(
44214401
f"DuckDB generation for INITCAP subquery as custom delimiter arg from {dialect}"
44224402
):
4423-
escaped_subquery = escape_expression_sql("(SELECT delimiter FROM settings LIMIT 1)")
4403+
escaped_subquery = "REPLACE(REPLACE(REPLACE(REPLACE(REPLACE((SELECT delimiter FROM settings LIMIT 1), '\\', '\\\\'), '-', '\\-'), '^', '\\^'), '[', '\\['), ']', '\\]')"
44244404
self.assertEqual(
44254405
parse_one(
44264406
"INITCAP(col, (SELECT delimiter FROM settings LIMIT 1))", read=dialect

0 commit comments

Comments
 (0)