Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
Ok(p!(DoubleColon))
}
Token::Colon => match parser.peek_nth_token(1).token {
// When colon is followed by a string or a number, it's usually in MAP syntax.
Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()),
// In other cases, it's used in semi-structured data traversal like in variant or JSON
// string columns. See `JsonAccess`.
_ => Ok(p!(Colon)),
},
Token::Arrow
| Token::LongArrow
| Token::HashArrow
Expand Down Expand Up @@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
Precedence::Ampersand => 23,
Precedence::Caret => 22,
Precedence::Pipe => 21,
Precedence::Colon => 21,
Precedence::Between => 20,
Precedence::Eq => 20,
Precedence::Like => 19,
Expand Down Expand Up @@ -1269,6 +1277,8 @@ pub enum Precedence {
Caret,
/// Bitwise `OR` / pipe operator (`|`).
Pipe,
/// `:` operator for json/variant access.
Colon,
/// `BETWEEN` operator.
Between,
/// Equality operator (`=`).
Expand Down
9 changes: 9 additions & 0 deletions src/dialect/mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
None
}
}

fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
let token = parser.peek_token();
match token.token {
// lowest prec to prevent it from turning into a binary op
Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}
}

impl MsSqlDialect {
Expand Down
3 changes: 3 additions & 0 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
| Token::ShiftRight
| Token::ShiftLeft
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
// lowest prec to prevent it from turning into a binary op
Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}
Expand All @@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect {
Precedence::Ampersand => PG_OTHER_PREC,
Precedence::Caret => CARET_PREC,
Precedence::Pipe => PG_OTHER_PREC,
Precedence::Colon => PG_OTHER_PREC,
Precedence::Between => BETWEEN_LIKE_PREC,
Precedence::Eq => EQ_PREC,
Precedence::Like => BETWEEN_LIKE_PREC,
Expand Down
8 changes: 5 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3921,7 +3921,7 @@ impl<'a> Parser<'a> {
expr: Box::new(expr),
})
} else if Token::LBracket == *tok && self.dialect.supports_partiql()
|| (dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok)
|| (Token::Colon == *tok)
{
self.prev_token();
self.parse_json_access(expr)
Expand Down Expand Up @@ -3957,7 +3957,8 @@ impl<'a> Parser<'a> {
let lower_bound = if self.consume_token(&Token::Colon) {
None
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the changes to the subscript behavior, we don't seem to have any new tests to accompany them?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes actually came from a failing test, but I will add some more tests to explicitly look for this behavior.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added some parse_array_subscript tests. I have verified that these catch the fixes made by these changes in subscript behavior.

};

// check for end
Expand Down Expand Up @@ -3985,7 +3986,8 @@ impl<'a> Parser<'a> {
stride: None,
});
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};

// check for end
Expand Down
145 changes: 145 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18067,3 +18067,148 @@ fn test_binary_kw_as_cast() {
all_dialects_where(|d| d.supports_binary_kw_as_cast())
.one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS BINARY)");
}

#[test]
fn parse_semi_structured_data_traversal() {
let dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(SnowflakeDialect {}),
Box::new(DatabricksDialect {}),
]);

// most basic case
let sql = "SELECT a:b FROM t";
let select = dialects.verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);

// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);

dialects.verified_stmt("SELECT a:b::INT FROM t");

// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);

// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);

// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
}

#[test]
fn parse_array_subscript() {
let dialects = all_dialects_except(|d| {
d.is::<MsSqlDialect>()
|| d.is::<SnowflakeDialect>()
|| d.is::<SQLiteDialect>()
|| d.is::<RedshiftSqlDialect>()
});

dialects.verified_stmt("SELECT arr[1]");
dialects.verified_stmt("SELECT arr[:]");
dialects.verified_stmt("SELECT arr[1:2]");
dialects.verified_stmt("SELECT arr[1:2:4]");
dialects.verified_stmt("SELECT arr[1:array_length(arr)]");
dialects.verified_stmt("SELECT arr[array_length(arr) - 1:array_length(arr)]");
dialects
.verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) - 1:array_length(arr)]");

dialects.verified_stmt("SELECT arr[1][2]");
dialects.verified_stmt("SELECT arr[:][:]");
}
115 changes: 2 additions & 113 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_semi_structured_data_traversal() {
// most basic case
let sql = "SELECT a:b FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);

// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);
// see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test
// cases. This test only has Snowflake-specific syntax like array access.

// expressions are allowed in bracket notation
let sql = r#"SELECT a[2 + 2] FROM t"#;
Expand All @@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
select.projection[0]
);

snowflake().verified_stmt("SELECT a:b::INT FROM t");

// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);

// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);

// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);

// dot and bracket notation can be mixed (starting with bracket case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a[0].foo.bar"#;
Expand Down