From 36c1c0c75a712496f5daa2b141994afc71557ed9 Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Thu, 23 Oct 2025 12:39:33 -0600
Subject: [PATCH 1/7] Add support for scalar string and binary functions

---
 CHANGELOG.md                                  |  11 +
 docs/source/snowpark/functions.rst            |  10 +
 .../snowpark/_functions/scalar_functions.py   | 773 ++++++++++++++++++
 3 files changed, 794 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 43254fdc7b..c9fa1a8c0c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,17 @@
     - `st_geometryfromwkt`
     - `try_to_geography`
     - `try_to_geometry`
+  - String and binary functions:
+    - `hex_decode_string`
+    - `jarowinkler_similarity`
+    - `parse_url`
+    - `regexp_instr`
+    - `regexp_like`
+    - `regexp_substr`
+    - `regexp_substr_all`
+    - `rtrimmed_length`
+    - `space`
+    - `split_part`
 - Added a parameter to enable and disable automatic column name aliasing for `interval_day_time_from_parts` and `interval_year_month_from_parts` functions.
 
 #### Bug Fixes
diff --git a/docs/source/snowpark/functions.rst b/docs/source/snowpark/functions.rst
index 7f54ad61a4..195b4f58f3 100644
--- a/docs/source/snowpark/functions.rst
+++ b/docs/source/snowpark/functions.rst
@@ -241,6 +241,7 @@ Functions
     haversine
     hex
     hex_decode_binary
+    hex_decode_string
     hex_encode
     hour
     h3_cell_to_boundary
@@ -306,6 +307,7 @@ Functions
     is_timestamp_ntz
     is_timestamp_tz
     is_varchar
+    jarowinkler_similarity
     json_extract_path_text
     kurtosis
     lag
@@ -375,6 +377,7 @@ Functions
     pandas_udf
     pandas_udtf
     parse_json
+    parse_url
     parse_xml
     percent_rank
     percentile_approx
@@ -395,8 +398,12 @@ Functions
     regr_avgy
     regr_count
     regr_intercept
+    regexp_instr
+    regexp_like
     regr_r2
     regr_slope
+    regexp_substr
+    regexp_substr_all
     regr_sxx
     regr_sxy
     regr_syy
@@ -408,6 +415,7 @@ Functions
     row_number
     rpad
     rtrim
+    rtrimmed_length
     second
     seq1
     seq2
@@ -425,7 +433,9 @@ Functions
     snowflake_cortex_summarize
     sort_array
     soundex
+    space
     split
+    split_part
     sproc
     sql_expr
     sqrt
diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index 8ff4eb8cc9..da551ec82f 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -3971,3 +3971,776 @@ def try_to_geometry(
         return builtin("try_to_geometry", _emit_ast=_emit_ast)(c, allow_invalid_col)
     else:
         return builtin("try_to_geometry", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def hex_decode_string(input_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
+    """
+    Decodes a hexadecimal-encoded string into its original string representation.
+
+    Args:
+        input_expr (ColumnOrName): The column or string containing the hexadecimal-encoded string to decode.
+
+    Returns:
+        Column: The decoded string.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col
+        >>> df = session.create_dataframe([["536E6F77666C616B65"], ["48454C4C4F"]], schema=["hex_string"])
+        >>> df.select(hex_decode_string(col("hex_string")).alias("decoded")).collect()
+        [Row(DECODED='Snowflake'), Row(DECODED='HELLO')]
+    """
+    c = _to_col_if_str(input_expr, "hex_decode_string")
+    return builtin("hex_decode_string", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def jarowinkler_similarity(
+    string_expr1: ColumnOrName, string_expr2: ColumnOrName, _emit_ast: bool = True
+) -> Column:
+    """
+    Computes the Jaro-Winkler similarity between two strings. The Jaro-Winkler similarity
+    is a string metric measuring an edit distance between two sequences. It is a variant
+    of the Jaro distance metric designed to give more favorable ratings to strings with
+    common prefixes.
+
+    Args:
+        string_expr1 (ColumnOrName): The first string expression to compare.
+        string_expr2 (ColumnOrName): The second string expression to compare.
+
+    Returns:
+        Column: The Jaro-Winkler similarity score as an integer between 0 and 100.
+
+    Examples::
+        >>> df = session.create_dataframe([
+        ...     ("Snowflake", "Oracle"),
+        ...     ("Ich weiß nicht", "Ich wei? nicht"),
+        ...     ("Gute nacht", "Ich weis nicht"),
+        ...     ("święta", "swieta"),
+        ...     ("", ""),
+        ...     ("test", "test")
+        ... ], schema=["s", "t"])
+        >>> df.select(jarowinkler_similarity(df["s"], df["t"]).alias("similarity")).collect()
+        [Row(SIMILARITY=61), Row(SIMILARITY=97), Row(SIMILARITY=56), Row(SIMILARITY=77), Row(SIMILARITY=0), Row(SIMILARITY=100)]
+    """
+    c1 = _to_col_if_str(string_expr1, "jarowinkler_similarity")
+    c2 = _to_col_if_str(string_expr2, "jarowinkler_similarity")
+    return builtin("jarowinkler_similarity", _emit_ast=_emit_ast)(c1, c2)
+
+
+@publicapi
+def parse_url(
+    string_expr: ColumnOrName, permissive: ColumnOrName = None, _emit_ast: bool = True
+) -> Column:
+    """
+    Parses a URL string and returns a JSON object containing the URL components.
+
+    Args:
+        string_expr (ColumnOrName): The URL string to parse.
+        permissive (ColumnOrName, optional): If 1, parsing errors are ignored and None is returned. If 0 or omitted, parsing errors raise an exception.
+
+    Returns:
+        Column: A JSON object containing the parsed URL components.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col, lit
+        >>> df = session.create_dataframe([
+        ...     ['https://www.snowflake.com/'],
+        ...     ['http://USER:PASS@EXAMPLE.INT:4345/HELLO.PHP?USER=1'],
+        ...     ['mailto:abc@xyz.com'],
+        ...     [None]
+        ... ], schema=["url"])
+        >>> df.select(parse_url(col("url"))).collect()
+        [Row(PARSE_URL("URL")='{\\n  "fragment": null,\\n  "host": "www.snowflake.com",\\n  "parameters": null,\\n  "path": "",\\n  "port": null,\\n  "query": null,\\n  "scheme": "https"\\n}'), Row(PARSE_URL("URL")='{\\n  "fragment": null,\\n  "host": "USER:PASS@EXAMPLE.INT",\\n  "parameters": {\\n    "USER": "1"\\n  },\\n  "path": "HELLO.PHP",\\n  "port": "4345",\\n  "query": "USER=1",\\n  "scheme": "http"\\n}'), Row(PARSE_URL("URL")='{\\n  "fragment": null,\\n  "host": null,\\n  "parameters": null,\\n  "path": "abc@xyz.com",\\n  "port": null,\\n  "query": null,\\n  "scheme": "mailto"\\n}'), Row(PARSE_URL("URL")=None)]
+
+        >>> df2 = session.create_dataframe([
+        ...     ['example.int/hello.php?user=12#nofragment']
+        ... ], schema=["invalid_url"])
+        >>> df2.select(parse_url(col("invalid_url"), lit(1))).collect()
+        [Row(PARSE_URL("INVALID_URL", 1)='{\\n  "error": "scheme not specified"\\n}')]
+    """
+    c = _to_col_if_str(string_expr, "parse_url")
+    if permissive is not None:
+        p = _to_col_if_str(permissive, "parse_url")
+        return builtin("parse_url", _emit_ast=_emit_ast)(c, p)
+    else:
+        return builtin("parse_url", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def regexp_instr(
+    subject: ColumnOrName,
+    pattern: ColumnOrName,
+    position: ColumnOrName = None,
+    occurrence: ColumnOrName = None,
+    option: ColumnOrName = None,
+    regexp_parameters: ColumnOrName = None,
+    group_num: ColumnOrName = None,
+    _emit_ast: bool = True,
+) -> Column:
+    """
+    Returns the position of the specified occurrence of the regular expression pattern in the string subject. If no match is found, returns 0.
+
+    Args:
+        subject (ColumnOrName): The string to search in.
+        pattern (ColumnOrName): The regular expression pattern to search for.
+        position (ColumnOrName, optional): The position in the string to start the search. Default is 1.
+        occurrence (ColumnOrName, optional): The occurrence of the pattern to find. Default is 1.
+        option (ColumnOrName, optional): Specifies whether to return the position of the first character of the match (0) or the position of the first character following the match (1). Default is 0.
+        regexp_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
+            Supported values:
+
+            +-----+-----------------------------------------------+
+            | Parameter | Description                         |
+            +=====+===============================================+
+            | c   | Case-sensitive matching                     |
+            +-----+-----------------------------------------------+
+            | i   | Case-insensitive matching                   |
+            +-----+-----------------------------------------------+
+            | m   | Multi-line mode                             |
+            +-----+-----------------------------------------------+
+            | e   | Extract submatches                          |
+            +-----+-----------------------------------------------+
+            | s   | Single-line mode POSIX wildcard character  |
+            |     | . matches \\n                               |
+            +-----+-----------------------------------------------+
+
+        group_num (ColumnOrName, optional): Specifies which capture group to return the position for. Default is None, which returns the position of the entire match.
+
+    Returns:
+        Column: The position of the match, or 0 if no match is found
+
+    Examples::
+        # Basic usage - only subject and pattern
+        >>> from snowflake.snowpark.functions import col, lit
+        >>> df = session.create_dataframe([["nevermore1, nevermore2, nevermore3.", "nevermore\\d"]], schema=["subject", "pattern"])
+        >>> df.select(regexp_instr(col("subject"), col("pattern")).alias("basic_match")).collect()
+        [Row(BASIC_MATCH=1)]
+
+        # With position parameter
+        >>> df2 = session.create_dataframe([["Hello world", "world", 7]], schema=["subject", "pattern", "position"])
+        >>> df2.select(regexp_instr(col("subject"), col("pattern"), col("position")).alias("position_match")).collect()
+        [Row(POSITION_MATCH=7)]
+
+        # With position and occurrence parameters
+        >>> df3 = session.create_dataframe([["nevermore1, nevermore2, nevermore3.", "nevermore\\d", 1, 2]], schema=["subject", "pattern", "position", "occurrence"])
+        >>> df3.select(regexp_instr(col("subject"), col("pattern"), col("position"), col("occurrence")).alias("second_occurrence")).collect()
+        [Row(SECOND_OCCURRENCE=13)]
+
+        # With position, occurrence, and option parameters
+        >>> df4 = session.create_dataframe([["Hello world", "world", 1, 1, 1]], schema=["subject", "pattern", "position", "occurrence", "option"])
+        >>> df4.select(regexp_instr(col("subject"), col("pattern"), col("position"), col("occurrence"), col("option")).alias("after_match")).collect()
+        [Row(AFTER_MATCH=12)]
+
+        # With position, occurrence, option, and regexp_parameters
+        >>> df5 = session.create_dataframe([["Hello world", "hello", 1, 1, 0, "i"]], schema=["subject", "pattern", "position", "occurrence", "option", "regexp_parameters"])
+        >>> df5.select(regexp_instr(col("subject"), col("pattern"), col("position"), col("occurrence"), col("option"), col("regexp_parameters")).alias("case_insensitive")).collect()
+        [Row(CASE_INSENSITIVE=1)]
+
+        # With all parameters including group_num
+        >>> df6 = session.create_dataframe([["Hello (World) (Test)", "(\\w+)", 1, 1, 0, "c", 1]], schema=["subject", "pattern", "position", "occurrence", "option", "regexp_parameters", "group_num"])
+        >>> df6.select(regexp_instr(col("subject"), col("pattern"), col("position"), col("occurrence"), col("option"), col("regexp_parameters"), col("group_num")).alias("first_group")).collect()
+        [Row(FIRST_GROUP=1)]
+
+        # Skipping position - with occurrence only
+        >>> df7 = session.create_dataframe([["nevermore1, nevermore2, nevermore3.", "nevermore\\d", "2"]], schema=["subject", "pattern", "occurrence"])
+        >>> df7.select(regexp_instr(col("subject"), col("pattern"), occurrence=col("occurrence")).alias("skip_position")).collect()
+        [Row(SKIP_POSITION=13)]
+
+        # Skipping position and occurrence - with option only
+        >>> df8 = session.create_dataframe([["Hello world", "world", 1]], schema=["subject", "pattern", "option"])
+        >>> df8.select(regexp_instr(col("subject"), col("pattern"), option=col("option")).alias("skip_position_occurrence")).collect()
+        [Row(SKIP_POSITION_OCCURRENCE=12)]
+
+        # Skipping position, occurrence, and option - with regexp_parameters only
+        >>> df9 = session.create_dataframe([["Hello World", "hello", "i"]], schema=["subject", "pattern", "regexp_parameters"])
+        >>> df9.select(regexp_instr(col("subject"), col("pattern"), regexp_parameters=col("regexp_parameters")).alias("skip_to_regexp_params")).collect()
+        [Row(SKIP_TO_REGEXP_PARAMS=1)]
+
+        # Skipping position, occurrence, option, and regexp_parameters - with group_num only
+        >>> df10 = session.create_dataframe([["Hello (world) (Test)", "(\\w+)", 1]], schema=["subject", "pattern", "group_num"])
+        >>> df10.select(regexp_instr(col("subject"), col("pattern"), group_num=col("group_num")).alias("skip_to_group_num")).collect()
+        [Row(SKIP_TO_GROUP_NUM=1)]
+
+        # Skipping position and occurrence - with option and regexp_parameters
+        >>> df11 = session.create_dataframe([["Hello World", "Hello", 1, "i"]], schema=["subject", "pattern", "option", "regexp_parameters"])
+        >>> df11.select(regexp_instr(col("subject"), col("pattern"), option=col("option"), regexp_parameters=col("regexp_parameters")).alias("skip_position_occurrence_with_params")).collect()
+        [Row(SKIP_POSITION_OCCURRENCE_WITH_PARAMS=6)]
+
+        # Skipping position, occurrence, and option - with regexp_parameters and group_num
+        >>> df12 = session.create_dataframe([["Hello (World) (Test)", "(\\w+)", "c", 1]], schema=["subject", "pattern", "regexp_parameters", "group_num"])
+        >>> df12.select(regexp_instr(col("subject"), col("pattern"), regexp_parameters=col("regexp_parameters"), group_num=col("group_num")).alias("skip_to_params_and_group")).collect()
+        [Row(SKIP_TO_PARAMS_AND_GROUP=1)]
+    """
+    if position is None:
+        position = lit(1)
+    if occurrence is None:
+        occurrence = lit(1)
+    if option is None:
+        option = lit(0)
+    if regexp_parameters is None:
+        regexp_parameters = lit("c")
+    args = [
+        _to_col_if_str(subject, "regexp_instr"),
+        _to_col_if_str(pattern, "regexp_instr"),
+        _to_col_if_str(position, "regexp_instr"),
+        _to_col_if_str(occurrence, "regexp_instr"),
+        _to_col_if_str(option, "regexp_instr"),
+        _to_col_if_str(regexp_parameters, "regexp_instr"),
+    ]
+
+    if group_num is not None:
+        args.append(_to_col_if_str(group_num, "regexp_instr"))
+
+    return builtin("regexp_instr", _emit_ast=_emit_ast)(*args)
+
+
+@publicapi
+def regexp_like(
+    subject: ColumnOrName,
+    pattern: ColumnOrName,
+    parameters: ColumnOrName = None,
+    _emit_ast: bool = True,
+) -> Column:
+    """
+    Returns true if the subject matches the specified pattern. Both inputs must be text expressions.
+
+    Args:
+        subject (ColumnOrName): A string expression to be matched against the pattern.
+        pattern (ColumnOrName): A string literal that will be used as a regular expression pattern.
+        parameters (ColumnOrName, optional): A string literal that specifies the parameters for the regular expression, defaults: c.
+            Supported Parameters:
+                c: Case-sensitive matching
+                i: Case-insensitive matching
+                m: Multi-line mode
+                e: Extract submatches
+                s: Single-line mode POSIX wildcard character . matches \
+
+    Returns:
+        Column: A boolean value indicating whether the subject matches the pattern.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import  col, lit
+        >>> df = session.create_dataframe([
+        ...     ('Sacramento',),
+        ...     ('San Francisco',),
+        ...     ('San Jose',),
+        ...     ('New York',),
+        ...     (None,)
+        ... ], schema=['city'])
+        >>> df.where(regexp_like(col('city'), lit('San.*'))).collect()
+        [Row(CITY='San Francisco'), Row(CITY='San Jose')]
+
+        >>> df.where(regexp_like(col('city'), lit('SAN.*'), lit('i'))).collect()
+        [Row(CITY='San Francisco'), Row(CITY='San Jose')]
+    """
+    subject_col = _to_col_if_str(subject, "regexp_like")
+    pattern_col = _to_col_if_str(pattern, "regexp_like")
+
+    if parameters is None:
+        return builtin("regexp_like", _emit_ast=_emit_ast)(subject_col, pattern_col)
+    else:
+        parameters_col = _to_col_if_str(parameters, "regexp_like")
+        return builtin("regexp_like", _emit_ast=_emit_ast)(
+            subject_col, pattern_col, parameters_col
+        )
+
+
+@publicapi
+def regexp_substr(
+    subject: ColumnOrName,
+    pattern: ColumnOrName,
+    position: ColumnOrName = None,
+    occurrence: ColumnOrName = None,
+    regex_parameters: ColumnOrName = None,
+    group_num: ColumnOrName = None,
+    _emit_ast: bool = True,
+) -> Column:
+    """
+    Returns the portion of the subject that matches the regular expression pattern.
+
+    Args:
+        subject (ColumnOrName): The string to search for matches.
+        pattern (ColumnOrName): The regular expression pattern to match.
+        position (ColumnOrName, optional): The position in the string to start searching from (1-based). Defaults to 1.
+        occurrence (ColumnOrName, optional): Which occurrence of the pattern to return. Defaults to 1.
+        regex_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
+            Supported values:
+
+            +-----+-----------------------------------------------+
+            | Parameter | Description                         |
+            +=====+===============================================+
+            | c   | Case-sensitive matching                     |
+            +-----+-----------------------------------------------+
+            | i   | Case-insensitive matching                   |
+            +-----+-----------------------------------------------+
+            | m   | Multi-line mode                             |
+            +-----+-----------------------------------------------+
+            | e   | Extract submatches                          |
+            +-----+-----------------------------------------------+
+            | s   | Single-line mode POSIX wildcard character  |
+            |     | . matches \\n                               |
+            +-----+-----------------------------------------------+
+
+        group_num (ColumnOrName, optional): The group number in the regular expression to extract. Defaults to None, which extracts the entire match.
+
+    Returns:
+        Column: The substring that matches the pattern, or None if no match is found.
+
+    Examples::
+        # Basic usage - only subject and pattern
+        >>> from snowflake.snowpark.functions import col, lit
+        >>> df = session.create_dataframe([["nevermore1, nevermore2, nevermore3.", "nevermore\\d"]], schema=["subject", "pattern"])
+        >>> df.select(regexp_substr(col("subject"), col("pattern")).alias("basic_match")).collect()
+        [Row(BASIC_MATCH='nevermore1')]
+
+        # With position parameter
+        >>> df2 = session.create_dataframe([["Hello world", "world", 7]], schema=["subject", "pattern", "position"])
+        >>> df2.select(regexp_substr(col("subject"), col("pattern"), col("position")).alias("position_match")).collect()
+        [Row(POSITION_MATCH='world')]
+
+        # With position and occurrence parameters
+        >>> df3 = session.create_dataframe([["nevermore1, nevermore2, nevermore3.", "nevermore\\d", 1, 2]], schema=["subject", "pattern", "position", "occurrence"])
+        >>> df3.select(regexp_substr(col("subject"), col("pattern"), col("position"), col("occurrence")).alias("second_occurrence")).collect()
+        [Row(SECOND_OCCURRENCE='nevermore2')]
+
+        # With position, occurrence, and regex_parameters
+        >>> df5 = session.create_dataframe([["Hello world", "hello", 1, 1, "i"]], schema=["subject", "pattern", "position", "occurrence", "regex_parameters"])
+        >>> df5.select(regexp_substr(col("subject"), col("pattern"), col("position"), col("occurrence"), col("regex_parameters")).alias("case_insensitive")).collect()
+        [Row(CASE_INSENSITIVE='Hello')]
+
+        # With all parameters including group_num
+        >>> df6 = session.create_dataframe([["Hello (World) (Test)", "(\\w+)", 1, 1, "c", 1]], schema=["subject", "pattern", "position", "occurrence", "regex_parameters", "group_num"])
+        >>> df6.select(regexp_substr(col("subject"), col("pattern"), col("position"), col("occurrence"), col("regex_parameters"), col("group_num")).alias("first_group")).collect()
+        [Row(FIRST_GROUP='Hello')]
+
+        # Skipping position - with occurrence only
+        >>> df7 = session.create_dataframe([["nevermore1, nevermore2, nevermore3.", "nevermore\\d", "2"]], schema=["subject", "pattern", "occurrence"])
+        >>> df7.select(regexp_substr(col("subject"), col("pattern"), occurrence=col("occurrence")).alias("skip_position")).collect()
+        [Row(SKIP_POSITION='nevermore2')]
+
+        # Skipping position, occurrence - with regex_parameters only
+        >>> df9 = session.create_dataframe([["Hello World", "hello", "i"]], schema=["subject", "pattern", "regex_parameters"])
+        >>> df9.select(regexp_substr(col("subject"), col("pattern"), regex_parameters=col("regex_parameters")).alias("skip_to_regexp_params")).collect()
+        [Row(SKIP_TO_REGEXP_PARAMS='Hello')]
+
+        # Skipping position, occurrence, and regex_parameters - with group_num only
+        >>> df10 = session.create_dataframe([["Hello (world) (Test)", "(\\w+)", 1]], schema=["subject", "pattern", "group_num"])
+        >>> df10.select(regexp_substr(col("subject"), col("pattern"), group_num=col("group_num")).alias("skip_to_group_num")).collect()
+        [Row(SKIP_TO_GROUP_NUM='Hello')]
+
+        # Skipping position, occurrence - with regex_parameters and group_num
+        >>> df12 = session.create_dataframe([["Hello (World) (Test)", "(\\w+)", "c", 1]], schema=["subject", "pattern", "regex_parameters", "group_num"])
+        >>> df12.select(regexp_substr(col("subject"), col("pattern"), regex_parameters=col("regex_parameters"), group_num=col("group_num")).alias("skip_to_params_and_group")).collect()
+        [Row(SKIP_TO_PARAMS_AND_GROUP='Hello')]
+    """
+
+    if position is None:
+        position = lit(1)
+    if occurrence is None:
+        occurrence = lit(1)
+    if regex_parameters is None:
+        regex_parameters = lit("c")
+
+    args = [
+        _to_col_if_str(subject, "regexp_substr"),
+        _to_col_if_str(pattern, "regexp_substr"),
+        _to_col_if_str(position, "regexp_substr"),
+        _to_col_if_str(occurrence, "regexp_substr"),
+        _to_col_if_str(regex_parameters, "regexp_substr"),
+    ]
+
+    if group_num is not None:
+        args.append(_to_col_if_str(group_num, "regexp_substr"))
+
+    return builtin("regexp_substr", _emit_ast=_emit_ast)(*args)
+
+
+@publicapi
+def regexp_substr_all(
+    subject: ColumnOrName,
+    pattern: ColumnOrName,
+    position: ColumnOrName = None,
+    occurrence: ColumnOrName = None,
+    regex_parameters: ColumnOrName = None,
+    group_num: ColumnOrName = None,
+    _emit_ast: bool = True,
+) -> Column:
+    """
+    Returns all substrings that match a regular expression within a string.
+
+    Args:
+        subject (ColumnOrName): The string to search for matches.
+        pattern (ColumnOrName): The regular expression pattern to match.
+        position (ColumnOrName, optional): The position in the string to start searching from (1-based). Defaults to 1.
+        occurrence (ColumnOrName, optional): Which occurrence of the pattern to return.
+        regex_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
+            Supported values:
+
+                +-----+-----------------------------------------------+
+                | Parameter | Description                         |
+                +=====+===============================================+
+                | c   | Case-sensitive matching                     |
+                +-----+-----------------------------------------------+
+                | i   | Case-insensitive matching                   |
+                +-----+-----------------------------------------------+
+                | m   | Multi-line mode                             |
+                +-----+-----------------------------------------------+
+                | e   | Extract submatches                          |
+                +-----+-----------------------------------------------+
+                | s   | Single-line mode POSIX wildcard character  |
+                |     | . matches \\n                               |
+                +-----+-----------------------------------------------+
+        group_num (ColumnOrName, optional): The group number in the regular expression to extract. Defaults to None, which extracts the entire match.
+
+    Returns:
+        Column: An array containing all matching substrings.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col, lit
+        >>> df = session.create_dataframe([['a1_a2a3_a4A5a6']], schema=["subject"])
+        >>> df.select(regexp_substr_all(col("subject"), lit('a[[:digit:]]')).alias("result")).collect()
+        [Row(RESULT='[\\n  "a1",\\n  "a2",\\n  "a3",\\n  "a4",\\n  "a6"\\n]')]
+
+        >>> df.select(regexp_substr_all(col("subject"), lit('a[[:digit:]]'), lit(2)).alias("result")).collect()
+        [Row(RESULT='[\\n  "a2",\\n  "a3",\\n  "a4",\\n  "a6"\\n]')]
+
+        >>> df.select(regexp_substr_all(col("subject"), lit('a[[:digit:]]'), lit(1), lit(3)).alias("result")).collect()
+        [Row(RESULT='[\\n  "a3",\\n  "a4",\\n  "a6"\\n]')]
+
+        >>> df.select(regexp_substr_all(col("subject"), lit('a[[:digit:]]'), lit(1), lit(1), lit('i')).alias("result")).collect()
+        [Row(RESULT='[\\n  "a1",\\n  "a2",\\n  "a3",\\n  "a4",\\n  "A5",\\n  "a6"\\n]')]
+
+        >>> df.select(regexp_substr_all(col("subject"), lit('(a)([[:digit:]])'), lit(1), lit(1), lit('ie'), lit(1)).alias("result")).collect()
+        [Row(RESULT='[\\n  "a",\\n  "a",\\n  "a",\\n  "a",\\n  "A",\\n  "a"\\n]')]
+
+        >>> df.select(regexp_substr_all(col("subject"), lit('b')).alias("result")).collect()
+        [Row(RESULT='[]')]
+    """
+    if position is None:
+        position = lit(1)
+    if occurrence is None:
+        occurrence = lit(1)
+    if regex_parameters is None:
+        regex_parameters = lit("c")
+
+    args = [
+        _to_col_if_str(subject, "regexp_substr_all"),
+        _to_col_if_str(pattern, "regexp_substr_all"),
+        _to_col_if_str(position, "regexp_substr_all"),
+        _to_col_if_str(occurrence, "regexp_substr_all"),
+        _to_col_if_str(regex_parameters, "regexp_substr_all"),
+    ]
+
+    if group_num is not None:
+        args.append(_to_col_if_str(group_num, "regexp_substr_all"))
+
+    return builtin("regexp_substr_all", _emit_ast=_emit_ast)(*args)
+
+
+@publicapi
+def rtrimmed_length(string_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
+    """
+    Returns the length of the input string after removing trailing whitespace characters.
+
+    Args:
+        string_expr (ColumnOrName): The string expression to calculate the right-trimmed length for.
+
+    Returns:
+        Column: The length of the string after removing trailing whitespace.
+
+    Examples::
+        >>> df = session.create_dataframe([" ABCD ", "hello world   ", "   test", "no_spaces", ""], schema=["a"])
+        >>> df.select(rtrimmed_length(df["a"]).alias("result")).collect()
+        [Row(RESULT=5), Row(RESULT=11), Row(RESULT=7), Row(RESULT=9), Row(RESULT=0)]
+    """
+    c = _to_col_if_str(string_expr, "rtrimmed_length")
+    return builtin("rtrimmed_length", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def space(n: ColumnOrName, _emit_ast: bool = True) -> Column:
+    """
+    Returns a string consisting of n space characters.
+
+    Args:
+        n (ColumnOrName): The number of space characters to return.
+
+    Returns:
+        Column: A string consisting of the specified number of space characters.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col
+        >>> df = session.create_dataframe([[3], [5], [0]], schema=["n"])
+        >>> df.select(space(col("n")).alias("result")).collect()
+        [Row(RESULT='   '), Row(RESULT='     '), Row(RESULT='')]
+    """
+    c = _to_col_if_str(n, "space")
+    return builtin("space", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def split_part(
+    string: ColumnOrName,
+    delimiter: ColumnOrName,
+    part_number: ColumnOrName,
+    _emit_ast: bool = True,
+) -> Column:
+    """
+    Splits a given string at a specified character and returns the requested part.
+
+    Args:
+        string (ColumnOrName): The string to be split.
+        delimiter (ColumnOrName): The delimiter to split the string on.
+        part_number (ColumnOrName): The part number to return (1-based indexing). Negative numbers count from the end.
+
+    Returns:
+        Column: The specified part of the split string.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col
+        >>> df = session.create_dataframe([
+        ...     ("11.22.33", ".", 1),
+        ...     ("11.22.33", ".", 2),
+        ...     ("11.22.33", ".", 3),
+        ...     ("11.22.33", ".", -1),
+        ...     ("127.0.0.1", ".", 1),
+        ...     ("127.0.0.1", ".", -1),
+        ...     ("|a|b|c|", "|", 1),
+        ...     ("|a|b|c|", "|", 2),
+        ...     ("aaa--bbb-BBB--ccc", "--", 2)
+        ... ], schema=["string_col", "delimiter_col", "part_number_col"])
+        >>> result = df.select(split_part(col("string_col"), col("delimiter_col"), col("part_number_col")).alias("result"))
+        >>> result.collect()
+        [Row(RESULT='11'), Row(RESULT='22'), Row(RESULT='33'), Row(RESULT='33'), Row(RESULT='127'), Row(RESULT='1'), Row(RESULT=''), Row(RESULT='a'), Row(RESULT='bbb-BBB')]
+    """
+    string_col = _to_col_if_str(string, "split_part")
+    delimiter_col = _to_col_if_str(delimiter, "split_part")
+    part_number_col = _to_col_if_str(part_number, "split_part")
+    return builtin("split_part", _emit_ast=_emit_ast)(
+        string_col, delimiter_col, part_number_col
+    )
+
+
+@publicapi
+def strtok(
+    string: ColumnOrName,
+    delimiter: ColumnOrName = None,
+    part_nr: ColumnOrName = None,
+    _emit_ast: bool = True,
+) -> Column:
+    """
+    Tokenizes a string with the given set of delimiters and returns the requested part.
+
+    Args:
+        string (ColumnOrName): The string to be tokenized.
+        delimiter (ColumnOrName, optional): A set of delimiters. Each character in the delimiter string is treated as a delimiter. If not specified, defaults to a single space character.
+        part_nr (ColumnOrName, optional): The requested part number (1-based). If not specified, returns the entire string.
+
+    Returns:
+        Column: The requested part of the tokenized string.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col, lit
+        >>> df = session.create_dataframe([["a.b.c"]], schema=["string_col"])
+        >>> df.select(strtok(col("string_col")).alias("result")).collect()
+        [Row(RESULT='a.b.c')]
+        >>> df.select(strtok(col("string_col"), lit(".")).alias("result")).collect()
+        [Row(RESULT='a')]
+        >>> df.select(strtok(col("string_col"), lit("."), lit(2)).alias("result")).collect()
+        [Row(RESULT='b')]
+        >>> df2 = session.create_dataframe([["user@snowflake.com"]], schema=["string_col"])
+        >>> df2.select(strtok(col("string_col"), lit("@."), lit(1)).alias("result")).collect()
+        [Row(RESULT='user')]
+        >>> df2.select(strtok(col("string_col"), lit("@."), lit(3)).alias("result")).collect()
+        [Row(RESULT='com')]
+    """
+    string_col = _to_col_if_str(string, "strtok")
+
+    if delimiter is None and part_nr is None:
+        return builtin("strtok", _emit_ast=_emit_ast)(string_col)
+    elif part_nr is None:
+        delimiter_col = _to_col_if_str(delimiter, "strtok")
+        return builtin("strtok", _emit_ast=_emit_ast)(string_col, delimiter_col)
+    else:
+        delimiter_col = (
+            _to_col_if_str(delimiter, "strtok") if delimiter is not None else lit(" ")
+        )
+        part_nr_col = _to_col_if_str(part_nr, "strtok")
+        return builtin("strtok", _emit_ast=_emit_ast)(
+            string_col, delimiter_col, part_nr_col
+        )
+
+
+@publicapi
+def try_base64_decode_binary(
+    input_expr: ColumnOrName, alphabet: ColumnOrName = None, _emit_ast: bool = True
+) -> Column:
+    """
+    Decodes a base64-encoded string to binary data. Returns NULL if the input is not valid base64.
+
+    Args:
+        input_expr (ColumnOrName): The base64-encoded string to decode.
+        alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
+
+    Returns:
+        Column: A column containing the decoded binary data, or None if the input is invalid.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import base64_encode
+        >>> df = session.create_dataframe(["HELP", "TEST"], schema=["input"])
+        >>> df.select(try_base64_decode_binary(base64_encode(df["input"]))).collect()
+        [Row(TRY_BASE64_DECODE_BINARY(BASE64_ENCODE("INPUT"))=bytearray(b'HELP')), Row(TRY_BASE64_DECODE_BINARY(BASE64_ENCODE("INPUT"))=bytearray(b'TEST'))]
+
+        >>> df2 = session.create_dataframe(["SEVMUA==", "VEVTVA=="], schema=["encoded"])
+        >>> df2.select(try_base64_decode_binary(df2["encoded"])).collect()
+        [Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'HELP')), Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'TEST'))]
+
+        >>> df3 = session.create_dataframe(["invalid_base64!"], schema=["bad_input"])
+        >>> df3.select(try_base64_decode_binary(df3["bad_input"])).collect()
+        [Row(TRY_BASE64_DECODE_BINARY("BAD_INPUT")=None)]
+    """
+    input_col = _to_col_if_str(input_expr, "try_base64_decode_binary")
+
+    if alphabet is not None:
+        alphabet_col = _to_col_if_str(alphabet, "try_base64_decode_binary")
+        return builtin("try_base64_decode_binary", _emit_ast=_emit_ast)(
+            input_col, alphabet_col
+        )
+    else:
+        return builtin("try_base64_decode_binary", _emit_ast=_emit_ast)(input_col)
+
+
+@publicapi
+def try_base64_decode_string(
+    input_expr: ColumnOrName, alphabet: ColumnOrName = None, _emit_ast: bool = True
+) -> Column:
+    """
+    Decodes a base64-encoded string and returns the result. If the input is not a valid base64-encoded string, returns NULL instead of raising an error.
+
+    Args:
+        input_expr (ColumnOrName): A base64-encoded string to decode.
+        alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
+
+    Returns:
+        Column: The decoded string, or NULL if the input is not valid base64.
+
+    Examples::
+        >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
+        >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
+        [Row(RESULT='HELLO')]
+
+        >>> df = session.create_dataframe([["invalid_base64"]], schema=["encoded"])
+        >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
+        [Row(RESULT=None)]
+
+        >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
+        >>> df.select(try_base64_decode_string(df["encoded"], lit('$')).alias('result')).collect()
+        [Row(RESULT='HELLO')]
+    """
+    c = _to_col_if_str(input_expr, "try_base64_decode_string")
+    if alphabet is not None:
+        alphabet_col = _to_col_if_str(alphabet, "try_base64_decode_string")
+        return builtin("try_base64_decode_string", _emit_ast=_emit_ast)(c, alphabet_col)
+    else:
+        return builtin("try_base64_decode_string", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def try_hex_decode_binary(input_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
+    """
+    Decodes a hex-encoded string to binary data. Returns None if the input is not a valid hex string.
+
+    Args:
+        input_expr (ColumnOrName): A hex-encoded string to decode to binary data.
+
+    Returns:
+        Column: The decoded binary data as bytearray, or None if input is invalid.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col
+        >>> df = session.create_dataframe([["41426162"], ["48656C6C6F"], ["576F726C64"]], schema=["hex_string"])
+        >>> df.select(try_hex_decode_binary(col("hex_string")).alias("decoded_binary")).collect()
+        [Row(DECODED_BINARY=bytearray(b'ABab')), Row(DECODED_BINARY=bytearray(b'Hello')), Row(DECODED_BINARY=bytearray(b'World'))]
+    """
+    c = _to_col_if_str(input_expr, "try_hex_decode_binary")
+    return builtin("try_hex_decode_binary", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def try_hex_decode_string(input_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
+    """
+    Decodes a hex-encoded string to its original string value. Returns None if the input is not a valid hex string.
+
+    Args:
+        input_expr (ColumnOrName): The hex-encoded string to decode.
+
+    Returns:
+        Column: The decoded string, or None if the input is not valid hex.
+
+    Examples::
+        >>> df = session.create_dataframe([["41614262"], ["127"], ["invalid_hex"]], schema=["hex_input"])
+        >>> df.select(try_hex_decode_string(df["hex_input"]).alias("decoded")).collect()
+        [Row(DECODED='AaBb'), Row(DECODED=None), Row(DECODED=None)]
+    """
+    c = _to_col_if_str(input_expr, "try_hex_decode_string")
+    return builtin("try_hex_decode_string", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def unicode(input_str: ColumnOrName, _emit_ast: bool = True) -> Column:
+    """
+    Returns the Unicode code point of the first character in a string.
+
+    Args:
+        input_str (ColumnOrName): The input string column or string value to get the Unicode code point from.
+
+    Returns:
+        Column: The Unicode code point of the first character. Returns 0 for empty strings.
+
+    Examples::
+        >>> from snowflake.snowpark.functions import col
+        >>> df = session.create_dataframe([['a'], ['❄'], ['cde'], ['']], schema=["input_str"])
+        >>> df.select(unicode(col("input_str")).alias("unicode_result")).collect()
+        [Row(UNICODE_RESULT=97), Row(UNICODE_RESULT=10052), Row(UNICODE_RESULT=99), Row(UNICODE_RESULT=0)]
+    """
+    c = _to_col_if_str(input_str, "unicode")
+    return builtin("unicode", _emit_ast=_emit_ast)(c)
+
+
+@publicapi
+def uuid_string(
+    uuid: ColumnOrName = None, name: ColumnOrName = None, _emit_ast: bool = True
+) -> Column:
+    """
+    Returns a universally unique identifier (UUID) as a string.
+
+    Args:
+        uuid (ColumnOrName, optional): The namespace UUID as a string. If provided, generates a UUID based on this namespace.
+        name (ColumnOrName, optional): The name to use for UUID generation. Used in combination with uuid parameter.
+
+    Returns:
+        Column: A Column object representing a UUID string.
+
+    Examples::
+        >>> df = session.create_dataframe([["test"]], schema=["a"])
+        >>> df.select(uuid_string().alias("random_uuid")).collect()  # doctest: +SKIP
+        [Row(RANDOM_UUID='...')]
+
+        >>> df.select(uuid_string("fe971b24-9572-4005-b22f-351e9c09274d", "foo").alias("named_uuid")).collect()  # doctest: +SKIP
+        [Row(NAMED_UUID='...')]
+
+        >>> df.select(uuid_string("fe971b24-9572-4005-b22f-351e9c09274d").alias("uuid_with_namespace")).collect()  # doctest: +SKIP
+        [Row(UUID_WITH_NAMESPACE='...')]
+
+        >>> df.select(uuid_string(name="foo").alias("uuid_with_name")).collect()  # doctest: +SKIP
+        [Row(UUID_WITH_NAME='...')]
+    """
+    if uuid is None and name is None:
+        return builtin("uuid_string", _emit_ast=_emit_ast)()
+    elif uuid is not None and name is not None:
+        return builtin("uuid_string", _emit_ast=_emit_ast)(uuid, name)
+    elif uuid is not None:
+        return builtin("uuid_string", _emit_ast=_emit_ast)(uuid)
+    else:
+        builtin("uuid_string", _emit_ast=_emit_ast)(name)

From e24b5ac69b3902e3a565669c219e15f6187a68bd Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Fri, 24 Oct 2025 08:55:56 -0600
Subject: [PATCH 2/7] Add missing return

---
 src/snowflake/snowpark/_functions/scalar_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index da551ec82f..5d00ba6a41 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -4743,4 +4743,4 @@ def uuid_string(
     elif uuid is not None:
         return builtin("uuid_string", _emit_ast=_emit_ast)(uuid)
     else:
-        builtin("uuid_string", _emit_ast=_emit_ast)(name)
+        return builtin("uuid_string", _emit_ast=_emit_ast)(name)

From 3c1df1758a30aa2978e72e289ef3865059163a9b Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Tue, 28 Oct 2025 10:59:39 -0600
Subject: [PATCH 3/7] Update docstrings

---
 .../snowpark/_functions/scalar_functions.py   | 60 +++++--------------
 1 file changed, 15 insertions(+), 45 deletions(-)

diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index 5d00ba6a41..01e8c24faf 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -4089,21 +4089,11 @@ def regexp_instr(
         option (ColumnOrName, optional): Specifies whether to return the position of the first character of the match (0) or the position of the first character following the match (1). Default is 0.
         regexp_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
             Supported values:
-
-            +-----+-----------------------------------------------+
-            | Parameter | Description                         |
-            +=====+===============================================+
-            | c   | Case-sensitive matching                     |
-            +-----+-----------------------------------------------+
-            | i   | Case-insensitive matching                   |
-            +-----+-----------------------------------------------+
-            | m   | Multi-line mode                             |
-            +-----+-----------------------------------------------+
-            | e   | Extract submatches                          |
-            +-----+-----------------------------------------------+
-            | s   | Single-line mode POSIX wildcard character  |
-            |     | . matches \\n                               |
-            +-----+-----------------------------------------------+
+                - `c`: Case-sensitive matching
+                - `i`: Case-insensitive matching
+                - `m`: Multi-line mode
+                - `e`: Extract submatches
+                - `s`: Single-line mode (POSIX wildcard character `.` matches `\n`)
 
         group_num (ColumnOrName, optional): Specifies which capture group to return the position for. Default is None, which returns the position of the entire match.
 
@@ -4266,21 +4256,11 @@ def regexp_substr(
         occurrence (ColumnOrName, optional): Which occurrence of the pattern to return. Defaults to 1.
         regex_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
             Supported values:
-
-            +-----+-----------------------------------------------+
-            | Parameter | Description                         |
-            +=====+===============================================+
-            | c   | Case-sensitive matching                     |
-            +-----+-----------------------------------------------+
-            | i   | Case-insensitive matching                   |
-            +-----+-----------------------------------------------+
-            | m   | Multi-line mode                             |
-            +-----+-----------------------------------------------+
-            | e   | Extract submatches                          |
-            +-----+-----------------------------------------------+
-            | s   | Single-line mode POSIX wildcard character  |
-            |     | . matches \\n                               |
-            +-----+-----------------------------------------------+
+                - `c`: Case-sensitive matching
+                - `i`: Case-insensitive matching
+                - `m`: Multi-line mode
+                - `e`: Extract submatches
+                - `s`: Single-line mode (POSIX wildcard character `.` matches `\n`)
 
         group_num (ColumnOrName, optional): The group number in the regular expression to extract. Defaults to None, which extracts the entire match.
 
@@ -4376,21 +4356,11 @@ def regexp_substr_all(
         occurrence (ColumnOrName, optional): Which occurrence of the pattern to return.
         regex_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
             Supported values:
-
-                +-----+-----------------------------------------------+
-                | Parameter | Description                         |
-                +=====+===============================================+
-                | c   | Case-sensitive matching                     |
-                +-----+-----------------------------------------------+
-                | i   | Case-insensitive matching                   |
-                +-----+-----------------------------------------------+
-                | m   | Multi-line mode                             |
-                +-----+-----------------------------------------------+
-                | e   | Extract submatches                          |
-                +-----+-----------------------------------------------+
-                | s   | Single-line mode POSIX wildcard character  |
-                |     | . matches \\n                               |
-                +-----+-----------------------------------------------+
+                - `c`: Case-sensitive matching
+                - `i`: Case-insensitive matching
+                - `m`: Multi-line mode
+                - `e`: Extract submatches
+                - `s`: Single-line mode (POSIX wildcard character `.` matches `\n`)
         group_num (ColumnOrName, optional): The group number in the regular expression to extract. Defaults to None, which extracts the entire match.
 
     Returns:

From e8ee8274a0ba5106a9f9525dc160031b27af9108 Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Tue, 28 Oct 2025 13:30:41 -0600
Subject: [PATCH 4/7] Update docstrings

---
 .../snowpark/_functions/scalar_functions.py   | 51 +++++++++----------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index 01e8c24faf..66dc92eafc 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -4088,13 +4088,12 @@ def regexp_instr(
         occurrence (ColumnOrName, optional): The occurrence of the pattern to find. Default is 1.
         option (ColumnOrName, optional): Specifies whether to return the position of the first character of the match (0) or the position of the first character following the match (1). Default is 0.
         regexp_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
-            Supported values:
-                - `c`: Case-sensitive matching
-                - `i`: Case-insensitive matching
-                - `m`: Multi-line mode
-                - `e`: Extract submatches
-                - `s`: Single-line mode (POSIX wildcard character `.` matches `\n`)
-
+        Supported values:
+            - `c`: Case-sensitive matching
+            - `i`: Case-insensitive matching
+            - `m`: Multi-line mode
+            - `e`: Extract submatches
+            - `s`: Single-line mode (POSIX wildcard character `.` matches `\\n`)
         group_num (ColumnOrName, optional): Specifies which capture group to return the position for. Default is None, which returns the position of the entire match.
 
     Returns:
@@ -4199,13 +4198,12 @@ def regexp_like(
         subject (ColumnOrName): A string expression to be matched against the pattern.
         pattern (ColumnOrName): A string literal that will be used as a regular expression pattern.
         parameters (ColumnOrName, optional): A string literal that specifies the parameters for the regular expression, defaults: c.
-            Supported Parameters:
-                c: Case-sensitive matching
-                i: Case-insensitive matching
-                m: Multi-line mode
-                e: Extract submatches
-                s: Single-line mode POSIX wildcard character . matches \
-
+        Supported values:
+            - `c`: Case-sensitive matching
+            - `i`: Case-insensitive matching
+            - `m`: Multi-line mode
+            - `e`: Extract submatches
+            - `s`: Single-line mode (POSIX wildcard character `.` matches `\\n`)
     Returns:
         Column: A boolean value indicating whether the subject matches the pattern.
 
@@ -4255,13 +4253,12 @@ def regexp_substr(
         position (ColumnOrName, optional): The position in the string to start searching from (1-based). Defaults to 1.
         occurrence (ColumnOrName, optional): Which occurrence of the pattern to return. Defaults to 1.
         regex_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
-            Supported values:
-                - `c`: Case-sensitive matching
-                - `i`: Case-insensitive matching
-                - `m`: Multi-line mode
-                - `e`: Extract submatches
-                - `s`: Single-line mode (POSIX wildcard character `.` matches `\n`)
-
+        Supported values:
+            - `c`: Case-sensitive matching
+            - `i`: Case-insensitive matching
+            - `m`: Multi-line mode
+            - `e`: Extract submatches
+            - `s`: Single-line mode (POSIX wildcard character `.` matches `\\n`)
         group_num (ColumnOrName, optional): The group number in the regular expression to extract. Defaults to None, which extracts the entire match.
 
     Returns:
@@ -4355,12 +4352,12 @@ def regexp_substr_all(
         position (ColumnOrName, optional): The position in the string to start searching from (1-based). Defaults to 1.
         occurrence (ColumnOrName, optional): Which occurrence of the pattern to return.
         regex_parameters (ColumnOrName, optional): String of one or more characters that specifies the parameters for the regular expression. Default is 'c' (case-sensitive).
-            Supported values:
-                - `c`: Case-sensitive matching
-                - `i`: Case-insensitive matching
-                - `m`: Multi-line mode
-                - `e`: Extract submatches
-                - `s`: Single-line mode (POSIX wildcard character `.` matches `\n`)
+        Supported values:
+            - `c`: Case-sensitive matching
+            - `i`: Case-insensitive matching
+            - `m`: Multi-line mode
+            - `e`: Extract submatches
+            - `s`: Single-line mode (POSIX wildcard character `.` matches `\\n`)
         group_num (ColumnOrName, optional): The group number in the regular expression to extract. Defaults to None, which extracts the entire match.
 
     Returns:

From 0f008aa82b91a034b3a2b647df9022e9695e00c0 Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Thu, 30 Oct 2025 11:54:39 -0600
Subject: [PATCH 5/7] Update docstring test.

---
 src/snowflake/snowpark/_functions/scalar_functions.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index 66dc92eafc..d66fc9301b 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -4551,7 +4551,7 @@ def try_base64_decode_binary(
         alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
 
     Returns:
-        Column: A column containing the decoded binary data, or None if the input is invalid.
+        Column: The decoded binary data, or None if the input is invalid.
 
     Examples::
         >>> from snowflake.snowpark.functions import base64_encode
@@ -4566,6 +4566,10 @@ def try_base64_decode_binary(
         >>> df3 = session.create_dataframe(["invalid_base64!"], schema=["bad_input"])
         >>> df3.select(try_base64_decode_binary(df3["bad_input"])).collect()
         [Row(TRY_BASE64_DECODE_BINARY("BAD_INPUT")=None)]
+
+        >>> df4 = session.create_dataframe(["SEVMTE8="], schema=["encoded"])
+        >>> df4.select(try_base64_decode_binary(df4["encoded"]), lit("+/=")).collect()
+        [Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'HELLO'), '+/='='+/=')]
     """
     input_col = _to_col_if_str(input_expr, "try_base64_decode_binary")
 

From cbdea7d2ed927ba24436479b318e1aec0cdab282 Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Thu, 30 Oct 2025 12:26:30 -0600
Subject: [PATCH 6/7] Update docstring test.

---
 src/snowflake/snowpark/_functions/scalar_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index d66fc9301b..503b72ac0b 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -4568,8 +4568,8 @@ def try_base64_decode_binary(
         [Row(TRY_BASE64_DECODE_BINARY("BAD_INPUT")=None)]
 
         >>> df4 = session.create_dataframe(["SEVMTE8="], schema=["encoded"])
-        >>> df4.select(try_base64_decode_binary(df4["encoded"]), lit("+/=")).collect()
-        [Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'HELLO'), '+/='='+/=')]
+        >>> df4.select(try_base64_decode_binary(df4["encoded"], lit("+/="))).collect()
+        [Row(TRY_BASE64_DECODE_BINARY("ENCODED", '+/=')=bytearray(b'HELLO'))]
     """
     input_col = _to_col_if_str(input_expr, "try_base64_decode_binary")
 

From b07283fa67cd5698593c4467736c9b804ff32fe9 Mon Sep 17 00:00:00 2001
From: Arturo Herrera Aguilar <a.herreraaguilar@snowflake.com>
Date: Fri, 31 Oct 2025 16:22:02 -0600
Subject: [PATCH 7/7] Remove functions from other PR

---
 .../snowpark/_functions/scalar_functions.py   | 178 ------------------
 1 file changed, 178 deletions(-)

diff --git a/src/snowflake/snowpark/_functions/scalar_functions.py b/src/snowflake/snowpark/_functions/scalar_functions.py
index 503b72ac0b..9619e092cb 100644
--- a/src/snowflake/snowpark/_functions/scalar_functions.py
+++ b/src/snowflake/snowpark/_functions/scalar_functions.py
@@ -4537,181 +4537,3 @@ def strtok(
         return builtin("strtok", _emit_ast=_emit_ast)(
             string_col, delimiter_col, part_nr_col
         )
-
-
-@publicapi
-def try_base64_decode_binary(
-    input_expr: ColumnOrName, alphabet: ColumnOrName = None, _emit_ast: bool = True
-) -> Column:
-    """
-    Decodes a base64-encoded string to binary data. Returns NULL if the input is not valid base64.
-
-    Args:
-        input_expr (ColumnOrName): The base64-encoded string to decode.
-        alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
-
-    Returns:
-        Column: The decoded binary data, or None if the input is invalid.
-
-    Examples::
-        >>> from snowflake.snowpark.functions import base64_encode
-        >>> df = session.create_dataframe(["HELP", "TEST"], schema=["input"])
-        >>> df.select(try_base64_decode_binary(base64_encode(df["input"]))).collect()
-        [Row(TRY_BASE64_DECODE_BINARY(BASE64_ENCODE("INPUT"))=bytearray(b'HELP')), Row(TRY_BASE64_DECODE_BINARY(BASE64_ENCODE("INPUT"))=bytearray(b'TEST'))]
-
-        >>> df2 = session.create_dataframe(["SEVMUA==", "VEVTVA=="], schema=["encoded"])
-        >>> df2.select(try_base64_decode_binary(df2["encoded"])).collect()
-        [Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'HELP')), Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'TEST'))]
-
-        >>> df3 = session.create_dataframe(["invalid_base64!"], schema=["bad_input"])
-        >>> df3.select(try_base64_decode_binary(df3["bad_input"])).collect()
-        [Row(TRY_BASE64_DECODE_BINARY("BAD_INPUT")=None)]
-
-        >>> df4 = session.create_dataframe(["SEVMTE8="], schema=["encoded"])
-        >>> df4.select(try_base64_decode_binary(df4["encoded"], lit("+/="))).collect()
-        [Row(TRY_BASE64_DECODE_BINARY("ENCODED", '+/=')=bytearray(b'HELLO'))]
-    """
-    input_col = _to_col_if_str(input_expr, "try_base64_decode_binary")
-
-    if alphabet is not None:
-        alphabet_col = _to_col_if_str(alphabet, "try_base64_decode_binary")
-        return builtin("try_base64_decode_binary", _emit_ast=_emit_ast)(
-            input_col, alphabet_col
-        )
-    else:
-        return builtin("try_base64_decode_binary", _emit_ast=_emit_ast)(input_col)
-
-
-@publicapi
-def try_base64_decode_string(
-    input_expr: ColumnOrName, alphabet: ColumnOrName = None, _emit_ast: bool = True
-) -> Column:
-    """
-    Decodes a base64-encoded string and returns the result. If the input is not a valid base64-encoded string, returns NULL instead of raising an error.
-
-    Args:
-        input_expr (ColumnOrName): A base64-encoded string to decode.
-        alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
-
-    Returns:
-        Column: The decoded string, or NULL if the input is not valid base64.
-
-    Examples::
-        >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
-        >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
-        [Row(RESULT='HELLO')]
-
-        >>> df = session.create_dataframe([["invalid_base64"]], schema=["encoded"])
-        >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
-        [Row(RESULT=None)]
-
-        >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
-        >>> df.select(try_base64_decode_string(df["encoded"], lit('$')).alias('result')).collect()
-        [Row(RESULT='HELLO')]
-    """
-    c = _to_col_if_str(input_expr, "try_base64_decode_string")
-    if alphabet is not None:
-        alphabet_col = _to_col_if_str(alphabet, "try_base64_decode_string")
-        return builtin("try_base64_decode_string", _emit_ast=_emit_ast)(c, alphabet_col)
-    else:
-        return builtin("try_base64_decode_string", _emit_ast=_emit_ast)(c)
-
-
-@publicapi
-def try_hex_decode_binary(input_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
-    """
-    Decodes a hex-encoded string to binary data. Returns None if the input is not a valid hex string.
-
-    Args:
-        input_expr (ColumnOrName): A hex-encoded string to decode to binary data.
-
-    Returns:
-        Column: The decoded binary data as bytearray, or None if input is invalid.
-
-    Examples::
-        >>> from snowflake.snowpark.functions import col
-        >>> df = session.create_dataframe([["41426162"], ["48656C6C6F"], ["576F726C64"]], schema=["hex_string"])
-        >>> df.select(try_hex_decode_binary(col("hex_string")).alias("decoded_binary")).collect()
-        [Row(DECODED_BINARY=bytearray(b'ABab')), Row(DECODED_BINARY=bytearray(b'Hello')), Row(DECODED_BINARY=bytearray(b'World'))]
-    """
-    c = _to_col_if_str(input_expr, "try_hex_decode_binary")
-    return builtin("try_hex_decode_binary", _emit_ast=_emit_ast)(c)
-
-
-@publicapi
-def try_hex_decode_string(input_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
-    """
-    Decodes a hex-encoded string to its original string value. Returns None if the input is not a valid hex string.
-
-    Args:
-        input_expr (ColumnOrName): The hex-encoded string to decode.
-
-    Returns:
-        Column: The decoded string, or None if the input is not valid hex.
-
-    Examples::
-        >>> df = session.create_dataframe([["41614262"], ["127"], ["invalid_hex"]], schema=["hex_input"])
-        >>> df.select(try_hex_decode_string(df["hex_input"]).alias("decoded")).collect()
-        [Row(DECODED='AaBb'), Row(DECODED=None), Row(DECODED=None)]
-    """
-    c = _to_col_if_str(input_expr, "try_hex_decode_string")
-    return builtin("try_hex_decode_string", _emit_ast=_emit_ast)(c)
-
-
-@publicapi
-def unicode(input_str: ColumnOrName, _emit_ast: bool = True) -> Column:
-    """
-    Returns the Unicode code point of the first character in a string.
-
-    Args:
-        input_str (ColumnOrName): The input string column or string value to get the Unicode code point from.
-
-    Returns:
-        Column: The Unicode code point of the first character. Returns 0 for empty strings.
-
-    Examples::
-        >>> from snowflake.snowpark.functions import col
-        >>> df = session.create_dataframe([['a'], ['❄'], ['cde'], ['']], schema=["input_str"])
-        >>> df.select(unicode(col("input_str")).alias("unicode_result")).collect()
-        [Row(UNICODE_RESULT=97), Row(UNICODE_RESULT=10052), Row(UNICODE_RESULT=99), Row(UNICODE_RESULT=0)]
-    """
-    c = _to_col_if_str(input_str, "unicode")
-    return builtin("unicode", _emit_ast=_emit_ast)(c)
-
-
-@publicapi
-def uuid_string(
-    uuid: ColumnOrName = None, name: ColumnOrName = None, _emit_ast: bool = True
-) -> Column:
-    """
-    Returns a universally unique identifier (UUID) as a string.
-
-    Args:
-        uuid (ColumnOrName, optional): The namespace UUID as a string. If provided, generates a UUID based on this namespace.
-        name (ColumnOrName, optional): The name to use for UUID generation. Used in combination with uuid parameter.
-
-    Returns:
-        Column: A Column object representing a UUID string.
-
-    Examples::
-        >>> df = session.create_dataframe([["test"]], schema=["a"])
-        >>> df.select(uuid_string().alias("random_uuid")).collect()  # doctest: +SKIP
-        [Row(RANDOM_UUID='...')]
-
-        >>> df.select(uuid_string("fe971b24-9572-4005-b22f-351e9c09274d", "foo").alias("named_uuid")).collect()  # doctest: +SKIP
-        [Row(NAMED_UUID='...')]
-
-        >>> df.select(uuid_string("fe971b24-9572-4005-b22f-351e9c09274d").alias("uuid_with_namespace")).collect()  # doctest: +SKIP
-        [Row(UUID_WITH_NAMESPACE='...')]
-
-        >>> df.select(uuid_string(name="foo").alias("uuid_with_name")).collect()  # doctest: +SKIP
-        [Row(UUID_WITH_NAME='...')]
-    """
-    if uuid is None and name is None:
-        return builtin("uuid_string", _emit_ast=_emit_ast)()
-    elif uuid is not None and name is not None:
-        return builtin("uuid_string", _emit_ast=_emit_ast)(uuid, name)
-    elif uuid is not None:
-        return builtin("uuid_string", _emit_ast=_emit_ast)(uuid)
-    else:
-        return builtin("uuid_string", _emit_ast=_emit_ast)(name)