From 2ce4c22931cfae586f0698ab7ff9468eaa520212 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:04:07 +0200 Subject: [PATCH 01/16] feat(snowflake): capture table-level policy applications in CREATE TABLE Snowflake CREATE TABLE may attach security/governance policies at the table level: ROW ACCESS, AGGREGATION, JOIN, and (dynamic tables) STORAGE LIFECYCLE. These were parsed and discarded; this captures them in a new TablePolicy {kind, with, policy_name, columns} on CreateTable so column-level lineage can surface which policy guards a table and over which columns. Each kind selects its column-list keyword: ROW ACCESS / STORAGE LIFECYCLE use ON (cols), AGGREGATION uses ENTITY KEY (cols), JOIN uses ALLOWED JOIN KEYS (cols); all column lists are optional (GET_DDL omits ON when the caller lacks privilege). The optional WITH prefix round-trips. Grammar per Snowflake docs: https://docs.snowflake.com/en/sql-reference/sql/create-table https://docs.snowflake.com/en/sql-reference/sql/create-aggregation-policy https://docs.snowflake.com/en/sql-reference/sql/create-join-policy https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table Fixes 3 corpus test failures (Snowflake). --- src/ast/ddl.rs | 67 ++++++++++++++ src/ast/helpers/stmt_create_table.rs | 11 +++ src/ast/mod.rs | 13 ++- src/parser/mod.rs | 133 ++++++++++++++++++--------- tests/sqlparser_snowflake.rs | 95 +++++++++++++++---- 5 files changed, 253 insertions(+), 66 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 4734c8591d..9c222720e5 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1386,6 +1386,73 @@ impl fmt::Display for ColumnPolicy { } } +/// Table-level security/governance policy applied in a `CREATE TABLE` +/// (or `CREATE DYNAMIC TABLE`) statement. +/// +/// Snowflake: +/// ```sql +/// [ WITH ] ROW ACCESS POLICY ON ( , ... ) +/// [ WITH ] AGGREGATION POLICY [ ENTITY KEY ( , ... ) ] +/// [ WITH ] JOIN POLICY [ ALLOWED JOIN KEYS ( , ... ) ] +/// [ WITH ] STORAGE LIFECYCLE POLICY ON ( , ... ) +/// ``` +/// [Snowflake CREATE TABLE]: https://docs.snowflake.com/en/sql-reference/sql/create-table +/// [Snowflake CREATE DYNAMIC TABLE]: https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TablePolicy { + pub kind: TablePolicyKind, + /// Whether the application carried the optional `WITH` prefix. + pub with: bool, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub policy_name: ObjectName, + /// Columns the policy is scoped to. Empty when the column list is omitted + /// (e.g. Snowflake `GET_DDL` emits `WITH ROW ACCESS POLICY p` with no `ON` + /// when the caller lacks privilege to see the policy). + pub columns: Vec>, +} + +/// The kind of a table-level [`TablePolicy`] application. Each kind selects the +/// keyword used to introduce its column list (`ON`, `ENTITY KEY`, `ALLOWED JOIN +/// KEYS`). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TablePolicyKind { + /// Snowflake `ROW ACCESS POLICY ON (cols)`. + RowAccess, + /// Snowflake `AGGREGATION POLICY [ENTITY KEY (cols)]`. + Aggregation, + /// Snowflake `JOIN POLICY [ALLOWED JOIN KEYS (cols)]`. + Join, + /// Snowflake dynamic table `STORAGE LIFECYCLE POLICY ON (cols)`. + StorageLifecycle, +} + +impl fmt::Display for TablePolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.with { + write!(f, "WITH ")?; + } + let (command, columns_keyword) = match self.kind { + TablePolicyKind::RowAccess => ("ROW ACCESS POLICY", "ON"), + TablePolicyKind::Aggregation => ("AGGREGATION POLICY", "ENTITY KEY"), + TablePolicyKind::Join => ("JOIN POLICY", "ALLOWED JOIN KEYS"), + TablePolicyKind::StorageLifecycle => ("STORAGE LIFECYCLE POLICY", "ON"), + }; + write!(f, "{command} {}", self.policy_name)?; + if !self.columns.is_empty() { + write!( + f, + " {columns_keyword} ({})", + display_comma_separated(&self.columns) + )?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 233ead1395..d202e78b09 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::ast::{ ColumnDef, DistributionStyle, EngineSpec, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, PartitionBoundSpec, Query, SqlOption, Statement, TableConstraint, + TablePolicy, }; use crate::parser::ParserError; use sqlparser::ast::TableProjection; @@ -96,6 +97,7 @@ pub struct CreateTableBuilder { pub inherits: Option>, pub partition_of: Option, pub partition_bound: Option, + pub table_policies: Vec, } impl CreateTableBuilder { @@ -150,6 +152,7 @@ impl CreateTableBuilder { inherits: None, partition_of: None, partition_bound: None, + table_policies: vec![], } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -390,6 +393,11 @@ impl CreateTableBuilder { self } + pub fn table_policies(mut self, table_policies: Vec) -> Self { + self.table_policies = table_policies; + self + } + pub fn build(self) -> Statement { Statement::CreateTable { or_replace: self.or_replace, @@ -441,6 +449,7 @@ impl CreateTableBuilder { inherits: self.inherits, partition_of: self.partition_of, partition_bound: self.partition_bound, + table_policies: self.table_policies, } } } @@ -502,6 +511,7 @@ impl TryFrom for CreateTableBuilder { inherits, partition_of, partition_bound, + table_policies, } => Ok(Self { or_replace, temporary, @@ -552,6 +562,7 @@ impl TryFrom for CreateTableBuilder { inherits, partition_of, partition_bound, + table_policies, }), _ => Err(ParserError::ParserError( format!("Expected create table statement, but received: {stmt}").into(), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f4672af5fb..30fd764fc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -39,8 +39,9 @@ pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnLocation, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, - ProcedureParam, ReferentialAction, TableConstraint, TableProjection, - UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewSecurity, + ProcedureParam, ReferentialAction, TableConstraint, TablePolicy, TablePolicyKind, + TableProjection, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ViewSecurity, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ @@ -1986,6 +1987,10 @@ pub enum Statement { partition_of: Option, /// PostgreSQL partition bound specification (`FOR VALUES ...` or `DEFAULT`). partition_bound: Option, + /// Table-level security/governance policy applications (Snowflake + /// `ROW ACCESS` / `AGGREGATION` / `JOIN` / `STORAGE LIFECYCLE POLICY`). + /// Preserved so column-level lineage can surface which policies guard a table. + table_policies: Vec, }, /// ```sql /// CREATE VIRTUAL TABLE .. USING ()` @@ -3499,6 +3504,7 @@ impl fmt::Display for Statement { dynamic, iceberg, hybrid, + table_policies, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -3737,6 +3743,9 @@ impl fmt::Display for Statement { if *copy_grants { write!(f, " COPY GRANTS")?; } + for policy in table_policies { + write!(f, " {policy}")?; + } if let Some(query) = query { write!(f, " AS {query}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ecf0acea77..2693106d76 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7552,59 +7552,34 @@ impl<'a> Parser<'a> { let mut on_commit: Option = None; let mut strict = false; let mut inherits: Option> = None; + let mut table_policies: Vec = vec![]; loop { - // [WITH] ROW ACCESS POLICY ON (, ...) (Snowflake) - // and [WITH] TAG (...). Handled before WITH (...) options so the - // WITH prefix doesn't force parse_options to consume LParen. + // Table-level security/governance policy applications (Snowflake), + // each with an optional `WITH` prefix. Handled before the WITH (...) + // options so the prefix doesn't force parse_options to consume LParen. + // [WITH] ROW ACCESS POLICY ON (cols) + // [WITH] AGGREGATION POLICY [ENTITY KEY (cols)] + // [WITH] JOIN POLICY [ALLOWED JOIN KEYS (cols)] + // [WITH] STORAGE LIFECYCLE POLICY ON (cols) (dynamic tables) + // [WITH] TAG (...) // https://docs.snowflake.com/en/sql-reference/sql/create-table - if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { - let _policy = self.parse_object_name(false)?; - // ON (cols) is optional — Snowflake's GET_DDL omits it when the caller - // lacks privilege to see the policy ("WITH ROW ACCESS POLICY unknown_policy"). - if self.parse_keyword(Keyword::ON) { - self.expect_token(&Token::LParen)?; - let _cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - } + // https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table + if let Some(policy) = self.maybe_parse_table_policy(false)? { + table_policies.push(policy); continue; } { let with = self.parse_keyword(Keyword::WITH); - if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { - let _policy = self.parse_object_name(false)?; - if self.parse_keyword(Keyword::ON) { - self.expect_token(&Token::LParen)?; - let _cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - } - continue; - } else if with - && matches!(self.peek_token_kind(), Token::Word(w) if w.value.eq_ignore_ascii_case("STORAGE")) - { - // Snowflake Dynamic Table: `WITH STORAGE LIFECYCLE POLICY - // ON (cols)`. Mirrors ROW ACCESS POLICY above — - // consume the clause; the ON columns are the table's own - // outputs, so no extra lineage edge. STORAGE / LIFECYCLE / - // POLICY aren't reserved keywords, so skip by value up to ON. - // https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table - self.next_token(); // STORAGE - while !matches!(self.peek_token_kind(), Token::EOF) - && !matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::ON) - && !matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::AS) - { - self.next_token(); - } - if self.parse_keyword(Keyword::ON) { - self.expect_token(&Token::LParen)?; - let _cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; + if with { + if let Some(policy) = self.maybe_parse_table_policy(true)? { + table_policies.push(policy); + continue; + } else if self.parse_optional_tag_clause() { + continue; + } else { + self.prev_token(); } - continue; - } else if with && self.parse_optional_tag_clause() { - continue; - } else if with { - self.prev_token(); } } @@ -8120,6 +8095,7 @@ impl<'a> Parser<'a> { .copy_grants(copy_grants) .location(location) .inherits(inherits) + .table_policies(table_policies) .build()) } @@ -9110,6 +9086,73 @@ impl<'a> Parser<'a> { /// Skip an optional `TAG (qualified_name = 'value', ...)` clause (Snowflake). /// Consumes the TAG keyword and the parenthesized list if present. /// Returns true if a TAG clause was consumed. + /// Try to parse a table-level Snowflake security/governance policy + /// application (`ROW ACCESS` / `AGGREGATION` / `JOIN` / `STORAGE LIFECYCLE + /// POLICY`). `with` records whether the optional `WITH` keyword was already + /// consumed by the caller, so the application round-trips. Returns `None` + /// (without consuming tokens beyond a probe) when the next tokens don't + /// introduce a table policy. + /// + fn maybe_parse_table_policy(&mut self, with: bool) -> Result, ParserError> { + let kind = if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { + TablePolicyKind::RowAccess + } else if self.parse_keywords(&[Keyword::AGGREGATION, Keyword::POLICY]) { + TablePolicyKind::Aggregation + } else if self.parse_keywords(&[Keyword::JOIN, Keyword::POLICY]) { + TablePolicyKind::Join + } else if self.peek_word_ci("STORAGE") { + // STORAGE / LIFECYCLE aren't reserved keywords, so match by value. + let idx = self.index; + self.next_token(); // STORAGE + if self.parse_word_ci("LIFECYCLE") && self.parse_keyword(Keyword::POLICY) { + TablePolicyKind::StorageLifecycle + } else { + self.index = idx; + return Ok(None); + } + } else { + return Ok(None); + }; + + let policy_name = self.parse_object_name(false)?; + + // The keyword introducing the (optional) scoped-column list depends on + // the policy kind: ON / ENTITY KEY / ALLOWED JOIN KEYS. + let columns = match kind { + TablePolicyKind::RowAccess | TablePolicyKind::StorageLifecycle => { + if self.parse_keyword(Keyword::ON) { + self.parse_parenthesized_column_list(Mandatory, false)? + } else { + vec![] + } + } + TablePolicyKind::Aggregation => { + if self.parse_word_ci("ENTITY") { + self.expect_keyword(Keyword::KEY)?; + self.parse_parenthesized_column_list(Mandatory, false)? + } else { + vec![] + } + } + TablePolicyKind::Join => { + if self.parse_word_ci("ALLOWED") { + self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword(Keyword::KEYS)?; + self.parse_parenthesized_column_list(Mandatory, false)? + } else { + vec![] + } + } + }; + + Ok(Some(TablePolicy { + kind, + with, + policy_name, + columns, + })) + } + fn parse_optional_tag_clause(&mut self) -> bool { if self.parse_keyword(Keyword::TAG) { if self.consume_token(&Token::LParen) { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 532e9f82cd..ad53b04302 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2374,39 +2374,81 @@ fn parse_create_view_with_masking_policy() { #[test] fn parse_create_table_with_row_access_policy() { - // Snowflake CREATE TABLE trailing [WITH] ROW ACCESS POLICY ON (cols) - // and [WITH] TAG (...). The WITH prefix is optional per + // Snowflake CREATE TABLE trailing [WITH] ROW ACCESS POLICY ON (cols). + // The WITH prefix is optional per // https://docs.snowflake.com/en/sql-reference/sql/create-table - snowflake().one_statement_parses_to( + // The policy application (name + scoped columns) is preserved in the AST so + // column-level lineage can surface which policy guards the table. + snowflake().verified_stmt( "CREATE TABLE t1 (id VARCHAR, dept VARCHAR) WITH ROW ACCESS POLICY p1 ON (id)", - "CREATE TABLE t1 (id VARCHAR, dept VARCHAR)", - ); - snowflake().one_statement_parses_to( - "CREATE TABLE t1 (id VARCHAR) ROW ACCESS POLICY p1 ON (id)", - "CREATE TABLE t1 (id VARCHAR)", ); + snowflake().verified_stmt("CREATE TABLE t1 (id VARCHAR) ROW ACCESS POLICY p1 ON (id)"); + // TAG (...) is still consumed without an AST node — drop it on round-trip. snowflake().one_statement_parses_to( "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id) WITH TAG (k = 'v')", - "CREATE TABLE t1 (id VARCHAR)", + "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id)", ); snowflake().one_statement_parses_to( "CREATE TABLE t1 (id VARCHAR) WITH TAG (k = 'v') WITH ROW ACCESS POLICY p1 ON (id)", - "CREATE TABLE t1 (id VARCHAR)", + "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id)", ); // Snowflake's GET_DDL omits `ON (cols)` when the caller lacks privilege to // see the policy — it returns `WITH ROW ACCESS POLICY unknown_policy` only. - snowflake().one_statement_parses_to( - "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY unknown_policy", - "CREATE TABLE t1 (id VARCHAR)", - ); - snowflake().one_statement_parses_to( - "CREATE TABLE t1 (id VARCHAR) ROW ACCESS POLICY unknown_policy", - "CREATE TABLE t1 (id VARCHAR)", - ); + snowflake().verified_stmt("CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY unknown_policy"); + snowflake().verified_stmt("CREATE TABLE t1 (id VARCHAR) ROW ACCESS POLICY unknown_policy"); snowflake().one_statement_parses_to( "CREATE VIEW v1 WITH ROW ACCESS POLICY unknown_policy AS SELECT * FROM t1", "CREATE VIEW v1 AS SELECT * FROM t1", ); + + // Assert the application is exposed in the AST (name + scoped columns). + match snowflake() + .verified_stmt("CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id, dept)") + { + Statement::CreateTable { table_policies, .. } => { + assert_eq!(table_policies.len(), 1); + let policy = &table_policies[0]; + assert_eq!(policy.kind, TablePolicyKind::RowAccess); + assert!(policy.with); + assert_eq!(policy.policy_name.to_string(), "p1"); + let cols: Vec = policy.columns.iter().map(|c| c.to_string()).collect(); + assert_eq!(cols, vec!["id".to_string(), "dept".to_string()]); + } + other => panic!("expected CreateTable, got {other:?}"), + } +} + +#[test] +fn test_snowflake_create_table_aggregation_join_policy() { + // AGGREGATION POLICY uses ENTITY KEY (cols); JOIN POLICY uses ALLOWED JOIN + // KEYS (cols); both column lists are optional. WITH prefix is optional. + // https://docs.snowflake.com/en/sql-reference/sql/create-aggregation-policy + // https://docs.snowflake.com/en/sql-reference/sql/create-join-policy + snowflake().verified_stmt( + "CREATE TABLE t (id VARCHAR, grp VARCHAR) WITH AGGREGATION POLICY agg_pol ENTITY KEY (id)", + ); + snowflake().verified_stmt("CREATE TABLE t (id VARCHAR) AGGREGATION POLICY agg_pol"); + snowflake().verified_stmt( + "CREATE TABLE t (a VARCHAR, b VARCHAR) WITH JOIN POLICY jp ALLOWED JOIN KEYS (a, b)", + ); + snowflake().verified_stmt("CREATE TABLE t (a VARCHAR) JOIN POLICY jp"); + + match snowflake().verified_stmt( + "CREATE TABLE t (a VARCHAR, b VARCHAR) WITH JOIN POLICY jp ALLOWED JOIN KEYS (a, b)", + ) { + Statement::CreateTable { table_policies, .. } => { + assert_eq!(table_policies.len(), 1); + assert_eq!(table_policies[0].kind, TablePolicyKind::Join); + assert_eq!(table_policies[0].policy_name.to_string(), "jp"); + let cols: Vec = table_policies[0] + .columns + .iter() + .map(|c| c.to_string()) + .collect(); + assert_eq!(cols, vec!["a".to_string(), "b".to_string()]); + } + other => panic!("expected CreateTable, got {other:?}"), + } } #[test] @@ -3178,11 +3220,26 @@ fn test_snowflake_create_dynamic_table_storage_lifecycle_policy() { WITH STORAGE LIFECYCLE POLICY expire_after_1w ON (order_date) \ AS SELECT order_id, order_date FROM raw_orders"; match snowflake().parse_sql_statements(sql).unwrap().remove(0) { - Statement::CreateTable { dynamic, query, .. } => { + Statement::CreateTable { + dynamic, + query, + table_policies, + .. + } => { assert!(dynamic); let q = query.expect("AS query should be preserved"); assert!(q.to_string().contains("raw_orders")); assert!(q.to_string().contains("order_id")); + // The storage lifecycle policy application is captured. + assert_eq!(table_policies.len(), 1); + assert_eq!(table_policies[0].kind, TablePolicyKind::StorageLifecycle); + assert_eq!(table_policies[0].policy_name.to_string(), "expire_after_1w"); + let cols: Vec = table_policies[0] + .columns + .iter() + .map(|c| c.to_string()) + .collect(); + assert_eq!(cols, vec!["order_date".to_string()]); } other => panic!("expected CreateTable, got {other:?}"), } From c543845071e02de7a1b03f3d0c749978d16a4600 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:15:25 +0200 Subject: [PATCH 02/16] feat(snowflake): capture table-level TAG associations in CREATE TABLE Snowflake CREATE TABLE may attach governance tags at the table level via [WITH] TAG (tag_name = 'value', ...). These were consumed and discarded; this captures them in a new Tag {name, value} list (table_tags on CreateTable) so lineage can surface which tags are applied to a table. Tag names may be qualified (db.schema.tag); values are string literals. Both the WITH-prefixed and bare forms parse and normalize to the canonical `WITH TAG (k = 'v', ...)` on round-trip. Removed the redundant pre-loop tag discarder that previously swallowed a leading WITH TAG before the clause loop. Grammar per Snowflake docs: https://docs.snowflake.com/en/sql-reference/sql/create-table --- src/ast/ddl.rs | 24 ++++++++++++ src/ast/helpers/stmt_create_table.rs | 12 +++++- src/ast/mod.rs | 8 +++- src/parser/mod.rs | 46 ++++++++++++++-------- tests/sqlparser_snowflake.rs | 57 ++++++++++++++++++---------- 5 files changed, 109 insertions(+), 38 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 9c222720e5..c53046034f 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1413,6 +1413,30 @@ pub struct TablePolicy { pub columns: Vec>, } +/// A tag association ` = ''` applied to a table, view, or +/// column (Snowflake `[ WITH ] TAG ( = '' [ , ... ] )`). +/// +/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Tag { + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub name: ObjectName, + pub value: String, +} + +impl fmt::Display for Tag { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} = '{}'", + self.name, + escape_single_quote_string(&self.value) + ) + } +} + /// The kind of a table-level [`TablePolicy`] application. Each kind selects the /// keyword used to introduce its column list (`ON`, `ENTITY KEY`, `ALLOWED JOIN /// KEYS`). diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index d202e78b09..a9cdaa36f8 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::ast::{ ColumnDef, DistributionStyle, EngineSpec, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, PartitionBoundSpec, Query, SqlOption, Statement, TableConstraint, - TablePolicy, + TablePolicy, Tag, }; use crate::parser::ParserError; use sqlparser::ast::TableProjection; @@ -98,6 +98,7 @@ pub struct CreateTableBuilder { pub partition_of: Option, pub partition_bound: Option, pub table_policies: Vec, + pub table_tags: Vec, } impl CreateTableBuilder { @@ -153,6 +154,7 @@ impl CreateTableBuilder { partition_of: None, partition_bound: None, table_policies: vec![], + table_tags: vec![], } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -398,6 +400,11 @@ impl CreateTableBuilder { self } + pub fn table_tags(mut self, table_tags: Vec) -> Self { + self.table_tags = table_tags; + self + } + pub fn build(self) -> Statement { Statement::CreateTable { or_replace: self.or_replace, @@ -450,6 +457,7 @@ impl CreateTableBuilder { partition_of: self.partition_of, partition_bound: self.partition_bound, table_policies: self.table_policies, + table_tags: self.table_tags, } } } @@ -512,6 +520,7 @@ impl TryFrom for CreateTableBuilder { partition_of, partition_bound, table_policies, + table_tags, } => Ok(Self { or_replace, temporary, @@ -563,6 +572,7 @@ impl TryFrom for CreateTableBuilder { partition_of, partition_bound, table_policies, + table_tags, }), _ => Err(ParserError::ParserError( format!("Expected create table statement, but received: {stmt}").into(), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 30fd764fc4..774915871e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,7 +40,7 @@ pub use self::ddl::{ ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, ReferentialAction, TableConstraint, TablePolicy, TablePolicyKind, - TableProjection, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewSecurity, }; pub use self::operator::{BinaryOperator, UnaryOperator}; @@ -1991,6 +1991,8 @@ pub enum Statement { /// `ROW ACCESS` / `AGGREGATION` / `JOIN` / `STORAGE LIFECYCLE POLICY`). /// Preserved so column-level lineage can surface which policies guard a table. table_policies: Vec, + /// Table-level tag associations (Snowflake `[WITH] TAG (k = 'v', ...)`). + table_tags: Vec, }, /// ```sql /// CREATE VIRTUAL TABLE .. USING ()` @@ -3505,6 +3507,7 @@ impl fmt::Display for Statement { iceberg, hybrid, table_policies, + table_tags, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -3746,6 +3749,9 @@ impl fmt::Display for Statement { for policy in table_policies { write!(f, " {policy}")?; } + if !table_tags.is_empty() { + write!(f, " WITH TAG ({})", display_comma_separated(table_tags))?; + } if let Some(query) = query { write!(f, " AS {query}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2693106d76..48492b57ce 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7517,19 +7517,6 @@ impl<'a> Parser<'a> { let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` - let _with_tag = if self.parse_keywords(&[Keyword::WITH, Keyword::TAG]) { - self.expect_token(&Token::LParen)?; - if !self.consume_token(&Token::RParen) { - let tags = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Some(tags) - } else { - Some(vec![]) - } - } else { - None - }; - // Parse CREATE TABLE clauses that can appear in any order after column definitions. // Different dialects (Snowflake, ClickHouse, BigQuery, Databricks, Redshift) allow // these clauses in varying orders, so we use a loop to flexibly accept them. @@ -7553,6 +7540,7 @@ impl<'a> Parser<'a> { let mut strict = false; let mut inherits: Option> = None; let mut table_policies: Vec = vec![]; + let mut table_tags: Vec = vec![]; loop { // Table-level security/governance policy applications (Snowflake), @@ -7575,7 +7563,8 @@ impl<'a> Parser<'a> { if let Some(policy) = self.maybe_parse_table_policy(true)? { table_policies.push(policy); continue; - } else if self.parse_optional_tag_clause() { + } else if let Some(tags) = self.maybe_parse_tags()? { + table_tags.extend(tags); continue; } else { self.prev_token(); @@ -7737,8 +7726,9 @@ impl<'a> Parser<'a> { continue; } - // TAG (...) (Snowflake) - if self.parse_optional_tag_clause() { + // TAG (...) (Snowflake), bare (no WITH prefix) + if let Some(tags) = self.maybe_parse_tags()? { + table_tags.extend(tags); continue; } @@ -8096,6 +8086,7 @@ impl<'a> Parser<'a> { .location(location) .inherits(inherits) .table_policies(table_policies) + .table_tags(table_tags) .build()) } @@ -9153,6 +9144,29 @@ impl<'a> Parser<'a> { })) } + /// Parse a Snowflake `TAG ( = '' [ , ... ] )` clause into + /// real AST nodes (tag name + string value). The optional `WITH` prefix is + /// consumed by the caller. Returns `None` (without consuming tokens) when + /// the next token isn't `TAG`. + /// + fn maybe_parse_tags(&mut self) -> Result>, ParserError> { + if !self.parse_keyword(Keyword::TAG) { + return Ok(None); + } + self.expect_token(&Token::LParen)?; + let tags = self.parse_comma_separated(|p| { + let name = p.parse_object_name(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_literal_string()?; + Ok(Tag { name, value }) + })?; + self.expect_token(&Token::RParen)?; + Ok(Some(tags)) + } + + /// Consume and discard a `TAG (...)` clause via balanced-paren skipping. + /// Used at the view / view-column sites that don't yet capture tags in the + /// AST; the CREATE TABLE sites use [`Self::maybe_parse_tags`] instead. fn parse_optional_tag_clause(&mut self) -> bool { if self.parse_keyword(Keyword::TAG) { if self.consume_token(&Token::LParen) { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index ad53b04302..bfa6814087 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2124,35 +2124,52 @@ fn test_column_with_masking() { #[test] fn test_table_with_tag() { + // Table-level TAG associations are now preserved in the AST (canonical + // form uses spaces around `=`). https://docs.snowflake.com/en/sql-reference/sql/create-table + // Simple tag name snowflake().one_statement_parses_to( "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (UNKNOWN_TAG='#UNKNOWN_VALUE')", - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216))" + "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (UNKNOWN_TAG = '#UNKNOWN_VALUE')" ); - // Schema-qualified tag name - snowflake().one_statement_parses_to( - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (TAG_SCHEMA.DOMAIN_MAPPING='marketing')", - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216))" + // Schema-qualified and fully-qualified tag names round-trip. + snowflake().verified_stmt( + "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (TAG_SCHEMA.DOMAIN_MAPPING = 'marketing')", + ); + snowflake().verified_stmt( + "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (PROD.TAG_SCHEMA.DOMAIN_MAPPING = 'marketing')", ); - // Fully-qualified tag name (database.schema.tag) - snowflake().one_statement_parses_to( - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (PROD.TAG_SCHEMA.DOMAIN_MAPPING='marketing')", - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216))" + // Multiple tags with different qualification levels. + snowflake().verified_stmt( + "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (SIMPLE_TAG = 'value1', SCHEMA.TAG_NAME = 'value2', DB.SCHEMA.TAG_NAME = 'value3')", ); - // Multiple tags with different qualification levels - snowflake().one_statement_parses_to( - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (SIMPLE_TAG='value1', SCHEMA.TAG_NAME='value2', DB.SCHEMA.TAG_NAME='value3')", - "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216))" + // Real-world example from the issue (anonymized). + snowflake().verified_stmt( + "CREATE OR REPLACE TABLE SCHEMA.DERIVED_TABLE (USER_ID VARCHAR(36), REPORTING_DATE TIMESTAMP_NTZ(9)) WITH TAG (STAGE.TAG_SCHEMA.DOMAIN_MAPPING = 'analytics')", ); - // Real-world example from the issue (anonymized) + // Bare TAG (no WITH prefix) is accepted and normalized to `WITH TAG`. snowflake().one_statement_parses_to( - "CREATE OR REPLACE TABLE SCHEMA.DERIVED_TABLE (USER_ID VARCHAR(36), REPORTING_DATE TIMESTAMP_NTZ(9)) WITH TAG (STAGE.TAG_SCHEMA.DOMAIN_MAPPING='analytics')", - "CREATE OR REPLACE TABLE SCHEMA.DERIVED_TABLE (USER_ID VARCHAR(36), REPORTING_DATE TIMESTAMP_NTZ(9))" + "CREATE TABLE t (id VARCHAR) TAG (k = 'v')", + "CREATE TABLE t (id VARCHAR) WITH TAG (k = 'v')", ); + + // The associations are exposed in the AST (tag name + value). + match snowflake().verified_stmt( + "CREATE OR REPLACE TABLE TBL (ID VARCHAR(16777216)) WITH TAG (SCHEMA.DOMAIN = 'marketing', COST_CENTER = 'eng')", + ) { + Statement::CreateTable { table_tags, .. } => { + assert_eq!(table_tags.len(), 2); + assert_eq!(table_tags[0].name.to_string(), "SCHEMA.DOMAIN"); + assert_eq!(table_tags[0].value, "marketing"); + assert_eq!(table_tags[1].name.to_string(), "COST_CENTER"); + assert_eq!(table_tags[1].value, "eng"); + } + other => panic!("expected CreateTable, got {other:?}"), + } } #[test] @@ -2383,14 +2400,14 @@ fn parse_create_table_with_row_access_policy() { "CREATE TABLE t1 (id VARCHAR, dept VARCHAR) WITH ROW ACCESS POLICY p1 ON (id)", ); snowflake().verified_stmt("CREATE TABLE t1 (id VARCHAR) ROW ACCESS POLICY p1 ON (id)"); - // TAG (...) is still consumed without an AST node — drop it on round-trip. - snowflake().one_statement_parses_to( + // ROW ACCESS POLICY and TAG together both round-trip (policies render + // before tags, so a tag-first input is reordered to the canonical form). + snowflake().verified_stmt( "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id) WITH TAG (k = 'v')", - "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id)", ); snowflake().one_statement_parses_to( "CREATE TABLE t1 (id VARCHAR) WITH TAG (k = 'v') WITH ROW ACCESS POLICY p1 ON (id)", - "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id)", + "CREATE TABLE t1 (id VARCHAR) WITH ROW ACCESS POLICY p1 ON (id) WITH TAG (k = 'v')", ); // Snowflake's GET_DDL omits `ON (cols)` when the caller lacks privilege to // see the policy — it returns `WITH ROW ACCESS POLICY unknown_policy` only. From 16b7df3ccc38e41c9b5cb754a201658894011ce3 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:19:14 +0200 Subject: [PATCH 03/16] feat(snowflake): capture column-level TAG associations in CREATE TABLE Column definitions in Snowflake CREATE TABLE may carry [WITH] TAG (k = 'v', ...) governance tags. These were consumed and discarded; this captures them in a new `tags: Vec` field on ColumnDef so lineage can surface per-column tagging. Both the WITH-prefixed and bare forms parse and normalize to the canonical `WITH TAG (...)` rendered after the column options/policy. All existing ColumnDef constructions across the test suite gain the new (empty) field. Grammar per Snowflake docs: https://docs.snowflake.com/en/sql-reference/sql/create-table --- src/ast/ddl.rs | 6 ++++++ src/parser/mod.rs | 12 ++++++++---- tests/sqlparser_bigquery.rs | 2 ++ tests/sqlparser_clickhouse.rs | 13 +++++++++++++ tests/sqlparser_common.rs | 15 +++++++++++++++ tests/sqlparser_mysql.rs | 20 ++++++++++++++++++++ tests/sqlparser_postgres.rs | 18 ++++++++++++++++++ tests/sqlparser_snowflake.rs | 34 +++++++++++++++++++++++----------- tests/sqlparser_sqlite.rs | 3 +++ 9 files changed, 108 insertions(+), 15 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index c53046034f..a460fc3291 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -881,6 +881,8 @@ pub struct ColumnDef { pub mask: Option, pub column_location: Option, pub column_policy: Option, + /// Column-level tag associations (Snowflake `[WITH] TAG (k = 'v', ...)`). + pub tags: Vec, } impl fmt::Display for ColumnDef { @@ -914,6 +916,10 @@ impl fmt::Display for ColumnDef { write!(f, "{column_policy}")?; } + if !self.tags.is_empty() { + write!(f, " WITH TAG ({})", display_comma_separated(&self.tags))?; + } + if let Some(column_location) = &self.column_location { write!(f, " {column_location}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 48492b57ce..945607df30 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8325,13 +8325,16 @@ impl<'a> Parser<'a> { None }; - // Skip optional [WITH] TAG (...) clause on columns (Snowflake) + // Optional [WITH] TAG (...) clause on columns (Snowflake), captured. + let mut tags = vec![]; if self.parse_keyword(Keyword::WITH) { - if !self.parse_optional_tag_clause() { + if let Some(parsed) = self.maybe_parse_tags()? { + tags = parsed; + } else { self.prev_token(); } - } else { - self.parse_optional_tag_clause(); + } else if let Some(parsed) = self.maybe_parse_tags()? { + tags = parsed; } // COMMENT may appear after MASKING POLICY / TAG (Snowflake) @@ -8372,6 +8375,7 @@ impl<'a> Parser<'a> { mask, column_location, column_policy, + tags, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index f771c5b4f6..725d97733f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -170,6 +170,7 @@ fn parse_nested_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("y").empty_span(), @@ -189,6 +190,7 @@ fn parse_nested_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 370d5f6186..d84cd03483 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -759,6 +759,7 @@ fn parse_create_table_with_variant_default_expressions() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("b").empty_span(), @@ -787,6 +788,7 @@ fn parse_create_table_with_variant_default_expressions() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("c").empty_span(), @@ -801,6 +803,7 @@ fn parse_create_table_with_variant_default_expressions() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("d").empty_span(), @@ -831,6 +834,7 @@ fn parse_create_table_with_variant_default_expressions() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("x").empty_span(), @@ -846,6 +850,7 @@ fn parse_create_table_with_variant_default_expressions() { mask: None, column_location: None, column_policy: None, + tags: vec![], } ] ) @@ -865,6 +870,7 @@ fn column_def(name: Ident, data_type: DataType) -> ColumnDef { mask: None, column_location: None, column_policy: None, + tags: vec![], } } @@ -962,6 +968,7 @@ fn parse_create_table_with_nullable() { mask: None, column_location: None, column_policy: None, + tags: vec![], } ] ); @@ -1008,6 +1015,7 @@ fn parse_create_table_with_nested_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("k").empty_span(), @@ -1036,6 +1044,7 @@ fn parse_create_table_with_nested_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("l").empty_span(), @@ -1064,6 +1073,7 @@ fn parse_create_table_with_nested_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("m").empty_span(), @@ -1078,6 +1088,7 @@ fn parse_create_table_with_nested_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); @@ -1114,6 +1125,7 @@ fn parse_create_view_with_fields_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("f").empty_span(), @@ -1131,6 +1143,7 @@ fn parse_create_view_with_fields_data_types() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 59867e80e1..571d24b478 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2638,6 +2638,7 @@ fn parse_create_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("lat").empty_span(), @@ -2652,6 +2653,7 @@ fn parse_create_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("lng").empty_span(), @@ -2663,6 +2665,7 @@ fn parse_create_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("constrained").empty_span(), @@ -2701,6 +2704,7 @@ fn parse_create_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("ref").empty_span(), @@ -2724,6 +2728,7 @@ fn parse_create_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("ref2").empty_span(), @@ -2744,6 +2749,7 @@ fn parse_create_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); @@ -2883,6 +2889,7 @@ fn parse_create_table_with_constraint_characteristics() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("lat").empty_span(), @@ -2897,6 +2904,7 @@ fn parse_create_table_with_constraint_characteristics() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("lng").empty_span(), @@ -2908,6 +2916,7 @@ fn parse_create_table_with_constraint_characteristics() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); @@ -3021,6 +3030,7 @@ fn parse_create_table_hive_array() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("val").empty_span(), @@ -3032,6 +3042,7 @@ fn parse_create_table_hive_array() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ], ) @@ -3443,6 +3454,7 @@ fn parse_create_external_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("lat").empty_span(), @@ -3457,6 +3469,7 @@ fn parse_create_external_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("lng").empty_span(), @@ -3468,6 +3481,7 @@ fn parse_create_external_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); @@ -3529,6 +3543,7 @@ fn parse_create_or_replace_external_table() { mask: None, column_location: None, column_policy: None, + tags: vec![], },] ); assert!(constraints.is_empty()); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f570646eb0..01220e039b 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -332,6 +332,7 @@ fn parse_create_table_auto_increment() { mask: None, column_location: None, column_policy: None, + tags: vec![], }], columns ); @@ -387,6 +388,7 @@ fn parse_create_table_unique_key() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar").empty_span(), @@ -401,6 +403,7 @@ fn parse_create_table_unique_key() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ], columns @@ -469,6 +472,7 @@ fn parse_create_table_set_enum() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("baz").empty_span(), @@ -483,6 +487,7 @@ fn parse_create_table_set_enum() { mask: None, column_location: None, column_policy: None, + tags: vec![], } ], columns @@ -515,6 +520,7 @@ fn parse_create_table_engine_default_charset() { mask: None, column_location: None, column_policy: None, + tags: vec![], },], columns ); @@ -547,6 +553,7 @@ fn parse_create_table_collate() { mask: None, column_location: None, column_policy: None, + tags: vec![], },], columns ); @@ -584,6 +591,7 @@ fn parse_create_table_comment_character_set() { mask: None, column_location: None, column_policy: None, + tags: vec![], },], columns ); @@ -615,6 +623,7 @@ fn parse_quote_identifiers() { mask: None, column_location: None, column_policy: None, + tags: vec![], }], columns ); @@ -992,6 +1001,7 @@ fn parse_create_table_with_minimum_display_width() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_smallint").empty_span(), @@ -1003,6 +1013,7 @@ fn parse_create_table_with_minimum_display_width() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_mediumint").empty_span(), @@ -1014,6 +1025,7 @@ fn parse_create_table_with_minimum_display_width() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_int").empty_span(), @@ -1025,6 +1037,7 @@ fn parse_create_table_with_minimum_display_width() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_bigint").empty_span(), @@ -1036,6 +1049,7 @@ fn parse_create_table_with_minimum_display_width() { mask: None, column_location: None, column_policy: None, + tags: vec![], } ], columns @@ -1063,6 +1077,7 @@ fn parse_create_table_unsigned() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_smallint").empty_span(), @@ -1074,6 +1089,7 @@ fn parse_create_table_unsigned() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_mediumint").empty_span(), @@ -1085,6 +1101,7 @@ fn parse_create_table_unsigned() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_int").empty_span(), @@ -1096,6 +1113,7 @@ fn parse_create_table_unsigned() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bar_bigint").empty_span(), @@ -1107,6 +1125,7 @@ fn parse_create_table_unsigned() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ], columns @@ -1774,6 +1793,7 @@ fn parse_table_colum_option_on_update() { mask: None, column_location: None, column_policy: None, + tags: vec![], }], columns ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6335246da3..c33d723f2f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -342,6 +342,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("store_id").empty_span(), @@ -356,6 +357,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("first_name").empty_span(), @@ -375,6 +377,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("last_name").empty_span(), @@ -394,6 +397,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("email").empty_span(), @@ -410,6 +414,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("address_id").empty_span(), @@ -424,6 +429,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("activebool").empty_span(), @@ -444,6 +450,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("create_date").empty_span(), @@ -466,6 +473,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("last_update").empty_span(), @@ -486,6 +494,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("active").empty_span(), @@ -500,6 +509,7 @@ fn parse_create_table_with_defaults() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); @@ -677,6 +687,7 @@ fn parse_alter_table_add_columns() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, }, AlterTableOperation::AddColumn { @@ -692,6 +703,7 @@ fn parse_alter_table_add_columns() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, }, ] @@ -4148,6 +4160,7 @@ fn parse_create_table_with_alias() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("int4_col").empty_span(), @@ -4159,6 +4172,7 @@ fn parse_create_table_with_alias() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("int2_col").empty_span(), @@ -4170,6 +4184,7 @@ fn parse_create_table_with_alias() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("float8_col").empty_span(), @@ -4181,6 +4196,7 @@ fn parse_create_table_with_alias() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("float4_col").empty_span(), @@ -4192,6 +4208,7 @@ fn parse_create_table_with_alias() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::new("bool_col").empty_span(), @@ -4203,6 +4220,7 @@ fn parse_create_table_with_alias() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ] ); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index bfa6814087..21c8d7f289 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2175,21 +2175,32 @@ fn test_table_with_tag() { #[test] fn test_column_with_tag() { // Column-level WITH TAG (...) — Snowflake docs allow `[ WITH ] TAG (...)` on columns, - // same shape as the table-level form. - snowflake().one_statement_parses_to( - "CREATE TABLE t (col NUMBER(18, 4) WITH TAG (a.b.c = 'True'))", - "CREATE TABLE t (col NUMBER(18, 4))", - ); + // same shape as the table-level form. The associations are preserved in the AST. + snowflake().verified_stmt("CREATE TABLE t (col NUMBER(18, 4) WITH TAG (a.b.c = 'True'))"); // Multiple columns, mixed with regular options. - snowflake().one_statement_parses_to( + snowflake().verified_stmt( "CREATE TABLE t (id INT, amt NUMBER(18, 4) WITH TAG (schema.tag = 'val'), name VARCHAR)", - "CREATE TABLE t (id INT, amt NUMBER(18, 4), name VARCHAR)", ); - // Bare TAG (no WITH) still works (existing behavior). + // Bare TAG (no WITH) is normalized to the canonical `WITH TAG`. snowflake().one_statement_parses_to( "CREATE TABLE t (col NUMBER(18, 4) TAG (a.b = 'v'))", - "CREATE TABLE t (col NUMBER(18, 4))", + "CREATE TABLE t (col NUMBER(18, 4) WITH TAG (a.b = 'v'))", ); + + // The associations are exposed on the ColumnDef (tag name + value). + match snowflake().verified_stmt( + "CREATE TABLE t (amt NUMBER(18, 4) WITH TAG (schema.tag = 'val', cost = 'eng'))", + ) { + Statement::CreateTable { columns, .. } => { + assert_eq!(columns.len(), 1); + assert_eq!(columns[0].tags.len(), 2); + assert_eq!(columns[0].tags[0].name.to_string(), "schema.tag"); + assert_eq!(columns[0].tags[0].value, "val"); + assert_eq!(columns[0].tags[1].name.to_string(), "cost"); + assert_eq!(columns[0].tags[1].value, "eng"); + } + other => panic!("expected CreateTable, got {other:?}"), + } } #[test] @@ -2716,10 +2727,11 @@ fn parse_column_comment_after_masking_policy() { "CREATE TABLE t1 (col1 VARCHAR COMMENT 'description' MASKING POLICY p1)", ); - // Column COMMENT after TAG in CREATE TABLE + // Column COMMENT after TAG in CREATE TABLE — both preserved (COMMENT renders + // with the column options, the tag association after). snowflake().one_statement_parses_to( "CREATE TABLE t1 (col1 VARCHAR TAG (t1 = 'v1') COMMENT 'description')", - "CREATE TABLE t1 (col1 VARCHAR COMMENT 'description')", + "CREATE TABLE t1 (col1 VARCHAR COMMENT 'description' WITH TAG (t1 = 'v1'))", ); } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 68d62ec528..a3080c170f 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -103,6 +103,7 @@ fn parse_create_table_auto_increment() { mask: None, column_location: None, column_policy: None, + tags: vec![], }], columns ); @@ -129,6 +130,7 @@ fn parse_create_sqlite_quote() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ColumnDef { name: Ident::with_quote('[', "INDEX").empty_span(), @@ -140,6 +142,7 @@ fn parse_create_sqlite_quote() { mask: None, column_location: None, column_policy: None, + tags: vec![], }, ], columns From f3f3d9879e0ee3a3148b7ddac8f6c80dd68fec22 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:23:04 +0200 Subject: [PATCH 04/16] feat(databricks): capture ROW FILTER and column MASK USING COLUMNS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Databricks attaches row filters and column masks via plain scalar UDFs: CREATE TABLE ... WITH ROW FILTER ON (cols) MASK [USING COLUMNS (|, ...)] ROW FILTER is captured as a new TablePolicyKind::RowFilter (uniform with the Snowflake table policies; ON-list holds the function arguments). Column MASK is upgraded from a bare ObjectName to a ColumnMask {function, using_columns} so the USING COLUMNS arguments (other column names and/or constant literals) are preserved for lineage — previously USING COLUMNS failed to parse. Grammar per Databricks docs: https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-row-filter https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-column-mask Fixes 2 corpus test failures (Databricks). --- src/ast/ddl.rs | 41 +++++++++++++++++++++++++++++++-- src/ast/mod.rs | 10 ++++---- src/parser/mod.rs | 32 ++++++++++++++++++++++---- tests/sqlparser_databricks.rs | 43 +++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 11 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index a460fc3291..c908246a48 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -878,7 +878,7 @@ pub struct ColumnDef { pub codec: Option>, pub options: Vec, pub column_options: Vec, - pub mask: Option, + pub mask: Option, pub column_location: Option, pub column_policy: Option, /// Column-level tag associations (Snowflake `[WITH] TAG (k = 'v', ...)`). @@ -909,7 +909,7 @@ impl fmt::Display for ColumnDef { )?; } if let Some(mask) = &self.mask { - write!(f, " MASK {mask}")?; + write!(f, " {mask}")?; } if let Some(column_policy) = &self.column_policy { @@ -1419,6 +1419,38 @@ pub struct TablePolicy { pub columns: Vec>, } +/// A Databricks column mask applied in a column definition or `ALTER COLUMN`: +/// `MASK [ USING COLUMNS ( | [ , ... ] ) ]`. +/// +/// The masking logic itself lives in the referenced scalar UDF; the optional +/// `USING COLUMNS` list supplies extra arguments (other column names and/or +/// constant literals) to that function. +/// +/// [Databricks]: https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-column-mask +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ColumnMask { + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub function: ObjectName, + /// `USING COLUMNS (...)` arguments: other column names and/or literals. + pub using_columns: Vec, +} + +impl fmt::Display for ColumnMask { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MASK {}", self.function)?; + if !self.using_columns.is_empty() { + write!( + f, + " USING COLUMNS ({})", + display_comma_separated(&self.using_columns) + )?; + } + Ok(()) + } +} + /// A tag association ` = ''` applied to a table, view, or /// column (Snowflake `[ WITH ] TAG ( = '' [ , ... ] )`). /// @@ -1458,6 +1490,10 @@ pub enum TablePolicyKind { Join, /// Snowflake dynamic table `STORAGE LIFECYCLE POLICY ON (cols)`. StorageLifecycle, + /// Databricks `ROW FILTER ON (cols)`. The `` is a scalar + /// UDF returning BOOLEAN; `ON (cols)` supplies its arguments. + /// + RowFilter, } impl fmt::Display for TablePolicy { @@ -1470,6 +1506,7 @@ impl fmt::Display for TablePolicy { TablePolicyKind::Aggregation => ("AGGREGATION POLICY", "ENTITY KEY"), TablePolicyKind::Join => ("JOIN POLICY", "ALLOWED JOIN KEYS"), TablePolicyKind::StorageLifecycle => ("STORAGE LIFECYCLE POLICY", "ON"), + TablePolicyKind::RowFilter => ("ROW FILTER", "ON"), }; write!(f, "{command} {}", self.policy_name)?; if !self.columns.is_empty() { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 774915871e..601d771a7f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -37,11 +37,11 @@ pub use self::dcl::{ }; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnLocation, - ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, - CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, - ProcedureParam, ReferentialAction, TableConstraint, TablePolicy, TablePolicyKind, - TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, - ViewSecurity, + ColumnMask, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, + ConstraintCharacteristics, CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, + KeyOrIndexDisplay, Partition, ProcedureParam, ReferentialAction, TableConstraint, TablePolicy, + TablePolicyKind, TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeRepresentation, ViewSecurity, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 945607df30..506fbfe57c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8303,8 +8303,22 @@ impl<'a> Parser<'a> { let column_options = self.parse_options(Keyword::OPTIONS)?; + // Databricks column mask: `MASK [USING COLUMNS (|, ...)]`. + // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-column-mask let mask = if self.parse_keyword(Keyword::MASK) { - Some(self.parse_object_name(false)?) + let function = self.parse_object_name(false)?; + let using_columns = if self.parse_keywords(&[Keyword::USING, Keyword::COLUMNS]) { + self.expect_token(&Token::LParen)?; + let cols = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + cols + } else { + vec![] + }; + Some(ColumnMask { + function, + using_columns, + }) } else { None }; @@ -9089,8 +9103,16 @@ impl<'a> Parser<'a> { /// introduce a table policy. /// fn maybe_parse_table_policy(&mut self, with: bool) -> Result, ParserError> { - let kind = if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { - TablePolicyKind::RowAccess + let kind = if self.parse_keyword(Keyword::ROW) { + // Snowflake `ROW ACCESS POLICY` vs Databricks `ROW FILTER`. + if self.parse_keywords(&[Keyword::ACCESS, Keyword::POLICY]) { + TablePolicyKind::RowAccess + } else if self.parse_keyword(Keyword::FILTER) { + TablePolicyKind::RowFilter + } else { + self.prev_token(); // put back ROW + return Ok(None); + } } else if self.parse_keywords(&[Keyword::AGGREGATION, Keyword::POLICY]) { TablePolicyKind::Aggregation } else if self.parse_keywords(&[Keyword::JOIN, Keyword::POLICY]) { @@ -9114,7 +9136,9 @@ impl<'a> Parser<'a> { // The keyword introducing the (optional) scoped-column list depends on // the policy kind: ON / ENTITY KEY / ALLOWED JOIN KEYS. let columns = match kind { - TablePolicyKind::RowAccess | TablePolicyKind::StorageLifecycle => { + TablePolicyKind::RowAccess + | TablePolicyKind::StorageLifecycle + | TablePolicyKind::RowFilter => { if self.parse_keyword(Keyword::ON) { self.parse_parenthesized_column_list(Mandatory, false)? } else { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 34e5233f48..7a7e4adc53 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -154,6 +154,49 @@ fn test_underscore_column_name() { #[test] fn test_create_table_column_mask() { databricks().verified_stmt("CREATE TABLE persons (name STRING, ssn STRING MASK mask_ssn)"); + + // Column mask with USING COLUMNS (other column names and/or literals). + // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-column-mask + databricks().verified_stmt( + "CREATE TABLE persons (name STRING, address STRING MASK mask_pii USING COLUMNS (region), region STRING)", + ); + match databricks().verified_stmt( + "CREATE TABLE t (address STRING MASK mask_addr USING COLUMNS (country, '_viewers'))", + ) { + Statement::CreateTable { columns, .. } => { + let mask = columns[0].mask.as_ref().expect("mask captured"); + assert_eq!(mask.function.to_string(), "mask_addr"); + assert_eq!(mask.using_columns.len(), 2); + assert_eq!(mask.using_columns[0].to_string(), "country"); + assert_eq!(mask.using_columns[1].to_string(), "'_viewers'"); + } + other => panic!("expected CreateTable, got {other:?}"), + } +} + +#[test] +fn test_create_table_row_filter() { + // Databricks table-level row filter: `WITH ROW FILTER ON (cols)`. + // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-row-filter + databricks().verified_stmt( + "CREATE TABLE employees (emp_name STRING, dept STRING) WITH ROW FILTER filter_emps ON (dept)", + ); + match databricks() + .verified_stmt("CREATE TABLE sales (region STRING) WITH ROW FILTER us_filter ON (region)") + { + Statement::CreateTable { table_policies, .. } => { + assert_eq!(table_policies.len(), 1); + assert_eq!(table_policies[0].kind, TablePolicyKind::RowFilter); + assert_eq!(table_policies[0].policy_name.to_string(), "us_filter"); + let cols: Vec = table_policies[0] + .columns + .iter() + .map(|c| c.to_string()) + .collect(); + assert_eq!(cols, vec!["region".to_string()]); + } + other => panic!("expected CreateTable, got {other:?}"), + } } #[test] From 0e17f083f77adf7b9cc097dd4fbcd1b04ab1a745 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:32:59 +0200 Subject: [PATCH 05/16] feat(snowflake): parse policy definitions (MASKING/ROW ACCESS/AGGREGATION/PROJECTION/JOIN) Snowflake security/governance policy definitions share one shape: CREATE [OR REPLACE] POLICY [IF NOT EXISTS] AS ( [ , ...] ) RETURNS -> [COMMENT = '...'] [EXEMPT_OTHER_POLICIES = { TRUE | FALSE }] These previously fell through the generic CREATE skip-until-semicolon fallback, discarding the masking/row-access condition. A new Statement::CreatePolicy variant captures kind + name + typed signature + RETURNS type + the `-> body` expression + trailing options. Parsing the body as a real Expr keeps any subqueries/table references (e.g. an EXISTS lookup) visible to lineage. Dispatched via maybe_parse before the generic fallback; the `AS` check makes it revert for non-Snowflake shapes (BigQuery's `ROW ACCESS POLICY ... ON `), so those still fall back unchanged pending dedicated handling. Grammar per Snowflake docs: https://docs.snowflake.com/en/sql-reference/sql/create-masking-policy https://docs.snowflake.com/en/sql-reference/sql/create-row-access-policy https://docs.snowflake.com/en/sql-reference/sql/create-aggregation-policy https://docs.snowflake.com/en/sql-reference/sql/create-projection-policy https://docs.snowflake.com/en/sql-reference/sql/create-join-policy --- src/ast/ddl.rs | 46 +++++++++++++++++++++ src/ast/mod.rs | 53 ++++++++++++++++++++++-- src/parser/mod.rs | 80 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 53 ++++++++++++++++++++++++ 4 files changed, 229 insertions(+), 3 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index c908246a48..36b1fd8e06 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1419,6 +1419,52 @@ pub struct TablePolicy { pub columns: Vec>, } +/// The kind of a Snowflake policy *definition* (`CREATE ... POLICY`). Each +/// selects the keyword sequence and the shape of the policy body. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PolicyKind { + /// `CREATE MASKING POLICY` + Masking, + /// `CREATE ROW ACCESS POLICY` + RowAccess, + /// `CREATE AGGREGATION POLICY` + Aggregation, + /// `CREATE PROJECTION POLICY` + Projection, + /// `CREATE JOIN POLICY` + Join, +} + +impl fmt::Display for PolicyKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + PolicyKind::Masking => "MASKING POLICY", + PolicyKind::RowAccess => "ROW ACCESS POLICY", + PolicyKind::Aggregation => "AGGREGATION POLICY", + PolicyKind::Projection => "PROJECTION POLICY", + PolicyKind::Join => "JOIN POLICY", + }) + } +} + +/// A single argument in a Snowflake policy signature (` `), e.g. the +/// `email varchar` in `CREATE MASKING POLICY p AS (email varchar) ...`. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct PolicyArg { + pub name: Ident, + pub data_type: DataType, +} + +impl fmt::Display for PolicyArg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.name, self.data_type) + } +} + /// A Databricks column mask applied in a column definition or `ALTER COLUMN`: /// `MASK [ USING COLUMNS ( | [ , ... ] ) ]`. /// diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 601d771a7f..053252fa5a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -39,9 +39,9 @@ pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnLocation, ColumnMask, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, - KeyOrIndexDisplay, Partition, ProcedureParam, ReferentialAction, TableConstraint, TablePolicy, - TablePolicyKind, TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, - UserDefinedTypeRepresentation, ViewSecurity, + KeyOrIndexDisplay, Partition, PolicyArg, PolicyKind, ProcedureParam, ReferentialAction, + TableConstraint, TablePolicy, TablePolicyKind, TableProjection, Tag, + UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewSecurity, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ @@ -2025,6 +2025,31 @@ pub enum Statement { /// ```sql /// CREATE ROLE /// ``` + /// ```sql + /// CREATE [OR REPLACE] { MASKING | ROW ACCESS | AGGREGATION | PROJECTION | JOIN } POLICY + /// [IF NOT EXISTS] AS ( [ [, ...]] ) RETURNS -> + /// [COMMENT = '...'] [EXEMPT_OTHER_POLICIES = { TRUE | FALSE }] + /// ``` + /// Snowflake security/governance policy *definition*. The masking/row-access + /// condition lives in `body` (a real `Expr`, so any subqueries/table refs are + /// visible to lineage). + /// + /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-masking-policy + CreatePolicy { + or_replace: bool, + if_not_exists: bool, + kind: PolicyKind, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + name: ObjectName, + /// Signature args `(name type, ...)`; empty for `AS ()`. + args: Vec, + /// `RETURNS `. + returns: DataType, + /// `-> ` condition/return expression. + body: Box, + /// Trailing `COMMENT = '...'`, `EXEMPT_OTHER_POLICIES = { TRUE | FALSE }`. + options: Vec, + }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -3850,6 +3875,28 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreatePolicy { + or_replace, + if_not_exists, + kind, + name, + args, + returns, + body, + options, + } => { + write!( + f, + "CREATE {or_replace}{kind} {if_not_exists}{name} AS ({args}) RETURNS {returns} -> {body}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + args = display_comma_separated(args), + )?; + for option in options { + write!(f, " {option}")?; + } + Ok(()) + } Statement::CreateRole { names, if_not_exists, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 506fbfe57c..bfe06585f8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4891,6 +4891,14 @@ impl<'a> Parser<'a> { self.next_token(); // SEMANTIC self.next_token(); // VIEW self.parse_create_semantic_view(or_replace) + } else if let Some(stmt) = self.maybe_parse(|p| p.parse_create_snowflake_policy(or_replace)) + { + // Snowflake security/governance policy definitions + // (CREATE { MASKING | ROW ACCESS | AGGREGATION | PROJECTION | JOIN } + // POLICY ... AS (sig) RETURNS type -> body). maybe_parse reverts and + // falls through for non-Snowflake shapes (e.g. BigQuery's + // `ROW ACCESS POLICY ... ON table`), which the generic fallback handles. + Ok(stmt) } else { // Generic fallback: skip tokens until end of statement // This handles dialect-specific CREATE statements like: @@ -4934,6 +4942,78 @@ impl<'a> Parser<'a> { } } + /// Parse a Snowflake security/governance policy *definition*: + /// `{ MASKING | ROW ACCESS | AGGREGATION | PROJECTION | JOIN } POLICY + /// [IF NOT EXISTS] AS ( [ , ...] ) RETURNS -> + /// [COMMENT = '...'] [EXEMPT_OTHER_POLICIES = { TRUE | FALSE }]`. + /// + /// Errors (so the caller's `maybe_parse` reverts) when the next tokens don't + /// form this shape — in particular the `AS` check rejects BigQuery's + /// `ROW ACCESS POLICY ... ON
` form, leaving it to other handlers. + /// + fn parse_create_snowflake_policy( + &mut self, + or_replace: bool, + ) -> Result { + let kind = if self.parse_keywords(&[Keyword::MASKING, Keyword::POLICY]) { + PolicyKind::Masking + } else if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { + PolicyKind::RowAccess + } else if self.parse_keywords(&[Keyword::AGGREGATION, Keyword::POLICY]) { + PolicyKind::Aggregation + } else if self.parse_keywords(&[Keyword::PROJECTION, Keyword::POLICY]) { + PolicyKind::Projection + } else if self.parse_keywords(&[Keyword::JOIN, Keyword::POLICY]) { + PolicyKind::Join + } else { + return self.expected("a policy kind", self.peek_token()); + }; + + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + // The Snowflake form is `AS (sig) RETURNS type -> body`. If `AS` is not + // next, this is a different policy shape (BigQuery/Postgres/Redshift) — + // error so the caller falls back. + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + vec![] + } else { + let args = self.parse_comma_separated(|p| { + let name = p.parse_identifier_no_span()?; + let data_type = p.parse_data_type()?; + Ok(PolicyArg { name, data_type }) + })?; + self.expect_token(&Token::RParen)?; + args + }; + + self.expect_keyword(Keyword::RETURNS)?; + let returns = self.parse_data_type()?; + self.expect_token(&Token::Arrow)?; + let body = Box::new(self.parse_expr()?); + + // Trailing `key = value` options (COMMENT, EXEMPT_OTHER_POLICIES). + let mut options = vec![]; + while matches!(self.peek_token_kind(), Token::Word(_)) + && self.peek_nth_token(1).token == Token::Eq + { + options.push(self.parse_sql_option()?); + } + + Ok(Statement::CreatePolicy { + or_replace, + if_not_exists, + kind, + name, + args, + returns, + body, + options, + }) + } + /// Parse a CACHE TABLE statement pub fn parse_cache_table(&mut self) -> Result { let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 21c8d7f289..8d8d07d916 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2203,6 +2203,59 @@ fn test_column_with_tag() { } } +#[test] +fn test_snowflake_create_policy_definitions() { + // Snowflake policy definitions: signature + RETURNS + `-> body`. + // https://docs.snowflake.com/en/sql-reference/sql/create-masking-policy + // https://docs.snowflake.com/en/sql-reference/sql/create-row-access-policy + // https://docs.snowflake.com/en/sql-reference/sql/create-aggregation-policy + // https://docs.snowflake.com/en/sql-reference/sql/create-projection-policy + // https://docs.snowflake.com/en/sql-reference/sql/create-join-policy + snowflake().verified_stmt( + "CREATE MASKING POLICY email_mask AS (val VARCHAR) RETURNS VARCHAR -> CASE WHEN current_role() = 'ADMIN' THEN val ELSE '***' END", + ); + snowflake().verified_stmt( + "CREATE OR REPLACE MASKING POLICY p AS (val STRING) RETURNS STRING -> val COMMENT = 'c' EXEMPT_OTHER_POLICIES = true", + ); + snowflake().verified_stmt( + "CREATE AGGREGATION POLICY ap AS () RETURNS AGGREGATION_CONSTRAINT -> NO_AGGREGATION_CONSTRAINT()", + ); + snowflake().verified_stmt( + "CREATE PROJECTION POLICY pp AS () RETURNS PROJECTION_CONSTRAINT -> PROJECTION_CONSTRAINT(ALLOW => true)", + ); + snowflake().verified_stmt( + "CREATE JOIN POLICY jp AS () RETURNS JOIN_CONSTRAINT -> JOIN_CONSTRAINT(JOIN_REQUIRED => true)", + ); + // Lowercase input normalizes types/keywords to canonical form. + snowflake().one_statement_parses_to( + "create masking policy m as (email varchar) returns varchar -> email", + "CREATE MASKING POLICY m AS (email VARCHAR) RETURNS VARCHAR -> email", + ); + + // The body Expr preserves table references for lineage (EXISTS subquery). + match snowflake().verified_stmt( + "CREATE ROW ACCESS POLICY rap AS (region VARCHAR) RETURNS BOOLEAN -> EXISTS (SELECT 1 FROM mgr_regions WHERE mgr = current_role())", + ) { + Statement::CreatePolicy { + kind, + name, + args, + returns, + body, + .. + } => { + assert_eq!(kind, PolicyKind::RowAccess); + assert_eq!(name.to_string(), "rap"); + assert_eq!(args.len(), 1); + assert_eq!(args[0].name.to_string(), "region"); + assert_eq!(returns.to_string(), "BOOLEAN"); + // The referenced table is reachable in the body expression. + assert!(body.to_string().contains("mgr_regions")); + } + other => panic!("expected CreatePolicy, got {other:?}"), + } +} + #[test] fn test_describe_table() { snowflake().verified_stmt(r#"DESCRIBE TABLE "DW_PROD"."SCH"."TBL""#); From 54f970bf150b0efcc8735da38c8cf5284fee5c3f Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:35:53 +0200 Subject: [PATCH 06/16] feat(snowflake): parse CREATE TAG definitions Snowflake tag definitions previously fell through the generic CREATE fallback. A new Statement::CreateTag captures the tag name, the optional ALLOWED_VALUES string list, and trailing key=value options (COMMENT, PROPAGATE, ON_CONFLICT), so tag objects are represented in the AST alongside their applications. Grammar per Snowflake docs: https://docs.snowflake.com/en/sql-reference/sql/create-tag --- src/ast/mod.rs | 44 ++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 35 ++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 26 +++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 053252fa5a..037f1a9e76 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2050,6 +2050,23 @@ pub enum Statement { /// Trailing `COMMENT = '...'`, `EXEMPT_OTHER_POLICIES = { TRUE | FALSE }`. options: Vec, }, + /// ```sql + /// CREATE [OR REPLACE] TAG [IF NOT EXISTS] + /// [ ALLOWED_VALUES '' [, '' ...] ] + /// [ = ... ] -- e.g. COMMENT, PROPAGATE, ON_CONFLICT + /// ``` + /// Snowflake tag *definition*. + /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-tag + CreateTag { + or_replace: bool, + if_not_exists: bool, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + name: ObjectName, + /// `ALLOWED_VALUES '' [, ...]` (empty when omitted). + allowed_values: Vec, + /// Trailing `key = value` options (COMMENT, PROPAGATE, ON_CONFLICT). + options: Vec, + }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -3897,6 +3914,33 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateTag { + or_replace, + if_not_exists, + name, + allowed_values, + options, + } => { + write!( + f, + "CREATE {or_replace}TAG {if_not_exists}{name}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + )?; + if !allowed_values.is_empty() { + write!(f, " ALLOWED_VALUES ")?; + for (i, v) in allowed_values.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "'{}'", value::escape_single_quote_string(v))?; + } + } + for option in options { + write!(f, " {option}")?; + } + Ok(()) + } Statement::CreateRole { names, if_not_exists, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bfe06585f8..245b203d0b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4891,6 +4891,8 @@ impl<'a> Parser<'a> { self.next_token(); // SEMANTIC self.next_token(); // VIEW self.parse_create_semantic_view(or_replace) + } else if self.parse_keyword(Keyword::TAG) { + self.parse_create_tag(or_replace) } else if let Some(stmt) = self.maybe_parse(|p| p.parse_create_snowflake_policy(or_replace)) { // Snowflake security/governance policy definitions @@ -5014,6 +5016,39 @@ impl<'a> Parser<'a> { }) } + /// Parse a Snowflake `CREATE TAG` definition. `TAG` has already been consumed. + /// `CREATE [OR REPLACE] TAG [IF NOT EXISTS] [ALLOWED_VALUES '' [, ...]] + /// [ = ...]`. + /// + fn parse_create_tag(&mut self, or_replace: bool) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + // `ALLOWED_VALUES` is a bare comma-separated list of string literals + // (no parens, no `=`); it isn't a reserved keyword, so match by value. + let allowed_values = if self.parse_word_ci("ALLOWED_VALUES") { + self.parse_comma_separated(|p| p.parse_literal_string())? + } else { + vec![] + }; + + // Trailing `key = value` options (COMMENT, PROPAGATE, ON_CONFLICT). + let mut options = vec![]; + while matches!(self.peek_token_kind(), Token::Word(_)) + && self.peek_nth_token(1).token == Token::Eq + { + options.push(self.parse_sql_option()?); + } + + Ok(Statement::CreateTag { + or_replace, + if_not_exists, + name, + allowed_values, + options, + }) + } + /// Parse a CACHE TABLE statement pub fn parse_cache_table(&mut self) -> Result { let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 8d8d07d916..6fac713a14 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2256,6 +2256,32 @@ fn test_snowflake_create_policy_definitions() { } } +#[test] +fn test_snowflake_create_tag() { + // Snowflake tag definition. + // https://docs.snowflake.com/en/sql-reference/sql/create-tag + snowflake().verified_stmt("CREATE OR REPLACE TAG accounting"); + snowflake().verified_stmt("CREATE TAG cost_center COMMENT = 'cost_center tag'"); + snowflake() + .verified_stmt("CREATE TAG my_tag ALLOWED_VALUES 'blue', 'red' PROPAGATE = ON_DEPENDENCY"); + snowflake().verified_stmt("CREATE TAG IF NOT EXISTS db.sch.t ALLOWED_VALUES 'a'"); + + match snowflake().verified_stmt("CREATE TAG cost_center ALLOWED_VALUES 'finance', 'eng'") { + Statement::CreateTag { + name, + allowed_values, + .. + } => { + assert_eq!(name.to_string(), "cost_center"); + assert_eq!( + allowed_values, + vec!["finance".to_string(), "eng".to_string()] + ); + } + other => panic!("expected CreateTag, got {other:?}"), + } +} + #[test] fn test_describe_table() { snowflake().verified_stmt(r#"DESCRIBE TABLE "DW_PROD"."SCH"."TBL""#); From ab11847256795ebc9c467535fa7958c020eb9a79 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:39:04 +0200 Subject: [PATCH 07/16] feat(bigquery): parse CREATE ROW ACCESS POLICY BigQuery row-level security previously fell through the generic CREATE fallback, discarding the target table and the filter predicate. A new Statement::CreateRowAccessPolicy captures the policy name, the `ON
` target, the optional `GRANT TO (...)` principal list, and the `FILTER USING ()` expression. Parsing the predicate as a real Expr keeps any subquery table references (e.g. a lookup-table IN-subquery) visible to lineage. Dispatched after the Snowflake policy attempt (whose `AS` check reverts for this `... ON
` shape), so Snowflake definitions are unaffected. Grammar per BigQuery docs: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language --- src/ast/mod.rs | 40 +++++++++++++++++++++++++++++++++ src/parser/mod.rs | 44 ++++++++++++++++++++++++++++++++++++- tests/sqlparser_bigquery.rs | 33 ++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 037f1a9e76..ddff6929bf 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2067,6 +2067,26 @@ pub enum Statement { /// Trailing `key = value` options (COMMENT, PROPAGATE, ON_CONFLICT). options: Vec, }, + /// ```sql + /// CREATE [OR REPLACE] ROW ACCESS POLICY [IF NOT EXISTS] ON
+ /// [ GRANT TO ( [, ...] ) ] + /// FILTER USING ( ) + /// ``` + /// BigQuery row-level security policy. The target `table_name` and the + /// `filter_using` predicate (which may itself contain a subquery) are + /// preserved for lineage. + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language + CreateRowAccessPolicy { + or_replace: bool, + if_not_exists: bool, + name: ObjectName, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + table_name: ObjectName, + /// `GRANT TO (...)` IAM principals (string literals); empty when omitted. + grant_to: Vec, + /// `FILTER USING ()`. + filter_using: Box, + }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -3941,6 +3961,26 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateRowAccessPolicy { + or_replace, + if_not_exists, + name, + table_name, + grant_to, + filter_using, + } => { + write!( + f, + "CREATE {or_replace}ROW ACCESS POLICY {if_not_exists}{name} ON {table_name}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + )?; + if !grant_to.is_empty() { + write!(f, " GRANT TO ({})", display_comma_separated(grant_to))?; + } + write!(f, " FILTER USING ({filter_using})")?; + Ok(()) + } Statement::CreateRole { names, if_not_exists, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 245b203d0b..533abd7a65 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4899,8 +4899,12 @@ impl<'a> Parser<'a> { // (CREATE { MASKING | ROW ACCESS | AGGREGATION | PROJECTION | JOIN } // POLICY ... AS (sig) RETURNS type -> body). maybe_parse reverts and // falls through for non-Snowflake shapes (e.g. BigQuery's - // `ROW ACCESS POLICY ... ON table`), which the generic fallback handles. + // `ROW ACCESS POLICY ... ON table`), handled next. Ok(stmt) + } else if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { + // BigQuery row-level security: + // ROW ACCESS POLICY ON
[GRANT TO (...)] FILTER USING (expr) + self.parse_create_row_access_policy(or_replace) } else { // Generic fallback: skip tokens until end of statement // This handles dialect-specific CREATE statements like: @@ -5016,6 +5020,44 @@ impl<'a> Parser<'a> { }) } + /// Parse a BigQuery `CREATE ROW ACCESS POLICY`. The `ROW ACCESS POLICY` + /// keywords have already been consumed. + /// ` ON
[GRANT TO (, ...)] FILTER USING ()`. + /// + fn parse_create_row_access_policy( + &mut self, + or_replace: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let grant_to = if self.parse_keywords(&[Keyword::GRANT, Keyword::TO]) { + self.expect_token(&Token::LParen)?; + let grantees = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + grantees + } else { + vec![] + }; + + self.expect_keyword(Keyword::FILTER)?; + self.expect_keyword(Keyword::USING)?; + self.expect_token(&Token::LParen)?; + let filter_using = Box::new(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + + Ok(Statement::CreateRowAccessPolicy { + or_replace, + if_not_exists, + name, + table_name, + grant_to, + filter_using, + }) + } + /// Parse a Snowflake `CREATE TAG` definition. `TAG` has already been consumed. /// `CREATE [OR REPLACE] TAG [IF NOT EXISTS] [ALLOWED_VALUES '' [, ...]] /// [ = ...]`. diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 725d97733f..3d431904c3 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1371,6 +1371,39 @@ fn bigquery() -> TestedDialects { } } +#[test] +fn parse_create_row_access_policy() { + // BigQuery row-level security. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language + bigquery().verified_stmt( + "CREATE ROW ACCESS POLICY us_filter ON project.dataset.my_table GRANT TO ('user:abc@example.com') FILTER USING (region = 'US')", + ); + bigquery().verified_stmt( + "CREATE OR REPLACE ROW ACCESS POLICY f ON ds.t GRANT TO ('domain:example.com', 'group:g@e.com') FILTER USING (region IN ('us-west1', 'us-west2'))", + ); + // GRANT TO is optional. + bigquery().verified_stmt("CREATE ROW ACCESS POLICY f ON ds.t FILTER USING (a > 1)"); + + // FILTER USING may contain a subquery — its table ref must survive for lineage. + match bigquery().verified_stmt( + "CREATE OR REPLACE ROW ACCESS POLICY apac ON project.dataset.my_table GRANT TO ('domain:example.com') FILTER USING (region IN (SELECT region FROM lookup_table WHERE email = SESSION_USER()))", + ) { + Statement::CreateRowAccessPolicy { + name, + table_name, + grant_to, + filter_using, + .. + } => { + assert_eq!(name.to_string(), "apac"); + assert_eq!(table_name.to_string(), "project.dataset.my_table"); + assert_eq!(grant_to.len(), 1); + assert!(filter_using.to_string().contains("lookup_table")); + } + other => panic!("expected CreateRowAccessPolicy, got {other:?}"), + } +} + fn bigquery_unescaped() -> TestedDialects { TestedDialects { dialects: vec![Box::new(BigQueryDialect {})], From 13ea06b818dcf483ef7bff7195afbc08082a0f7c Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:51:35 +0200 Subject: [PATCH 08/16] fix(snowflake): support conditional-masking USING (cols) after column list Snowflake conditional masking places the masking policy's USING (col, cond_col, ...) clause *after* the column-list parentheses (per the column-security guide), e.g. `CREATE TABLE t (email STRING MASKING POLICY p) USING (email, visibility)`. This previously errored because the trailing `USING (` collided with the Databricks `USING ` / Snowflake `USING TEMPLATE` handling. For CREATE TABLE the columns are attached to the preceding column's masking policy (canonical form carries USING inline), preserving them for lineage; for CREATE VIEW the clause is consumed (view column policies aren't represented). Grammar per Snowflake docs: https://docs.snowflake.com/en/user-guide/security-column-intro Fixes 3 corpus test failures (Snowflake). --- src/parser/mod.rs | 31 +++++++++++++++++++++++++++++- tests/sqlparser_snowflake.rs | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 533abd7a65..21f346ff68 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6164,6 +6164,18 @@ impl<'a> Parser<'a> { ) }; + // Snowflake conditional masking: an optional `USING (col, ...)` clause + // after the column list supplies the masking policy's conditional + // columns. View column policies aren't represented, so consume it. + // https://docs.snowflake.com/en/user-guide/security-column-intro + if !columns.is_empty() + && matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::USING) + && matches!(self.peek_nth_token(1).token, Token::LParen) + { + self.next_token(); // USING + let _ = self.parse_parenthesized_column_list(Mandatory, false)?; + } + // Snowflake view-level `WITH TAG (...)` before the regular WITH (...) options — // https://docs.snowflake.com/en/sql-reference/sql/create-view if self.parse_keywords(&[Keyword::WITH, Keyword::TAG]) { @@ -7598,7 +7610,7 @@ impl<'a> Parser<'a> { }; // parse optional column list (schema) - let (columns, constraints, projections) = self.parse_columns()?; + let (mut columns, constraints, projections) = self.parse_columns()?; // PostgreSQL: partition bound for `PARTITION OF parent` let partition_bound = if partition_of.is_some() { @@ -7612,6 +7624,23 @@ impl<'a> Parser<'a> { // Snowflake: USING TEMPLATE (query_expr) let expr = self.parse_expr()?; (None, Some(Box::new(expr))) + } else if self.peek_token_is(&Token::LParen) { + // Snowflake conditional masking: a `USING (col, cond_col, ...)` + // clause *after* the column-list parens supplies the masking + // policy's conditional columns. Attach them to the column whose + // MASKING POLICY they qualify (canonical form puts USING inline). + // https://docs.snowflake.com/en/user-guide/security-column-intro + let cols = self.parse_parenthesized_column_list(Mandatory, false)?; + if let Some(ColumnPolicy::MaskingPolicy(property)) = columns + .iter_mut() + .rev() + .find_map(|c| c.column_policy.as_mut()) + { + if property.using_columns.is_none() { + property.using_columns = Some(cols); + } + } + (None, None) } else { // Databricks: USING DELTA (Some(self.parse_object_name(false)?), None) diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 6fac713a14..7f1cacad64 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2256,6 +2256,43 @@ fn test_snowflake_create_policy_definitions() { } } +#[test] +fn test_snowflake_conditional_masking_using_after_columns() { + // Snowflake conditional masking places the masking policy's USING (cols) + // *after* the column-list parens. For CREATE TABLE the columns are attached + // to the column's masking policy (canonical form has USING inline). + // https://docs.snowflake.com/en/user-guide/security-column-intro + snowflake().one_statement_parses_to( + "CREATE OR REPLACE TABLE user_info (email STRING MASKING POLICY email_visibility) USING (email, visibility)", + "CREATE OR REPLACE TABLE user_info (email STRING MASKING POLICY email_visibility USING (email, visibility))", + ); + match snowflake().verified_stmt( + "CREATE OR REPLACE TABLE user_info (email STRING MASKING POLICY email_visibility USING (email, visibility))", + ) { + Statement::CreateTable { columns, .. } => match &columns[0].column_policy { + Some(ColumnPolicy::MaskingPolicy(p)) => { + let using: Vec = p + .using_columns + .as_ref() + .unwrap() + .iter() + .map(|c| c.to_string()) + .collect(); + assert_eq!(using, vec!["email".to_string(), "visibility".to_string()]); + } + other => panic!("expected MaskingPolicy, got {other:?}"), + }, + other => panic!("expected CreateTable, got {other:?}"), + } + + // For CREATE VIEW the trailing USING (cols) is consumed (view column + // policies aren't represented), leaving a clean column list. + snowflake().one_statement_parses_to( + "CREATE OR REPLACE VIEW user_info_v (email MASKING POLICY email_visibility) USING (email, visibility) AS SELECT * FROM user_info", + "CREATE OR REPLACE VIEW user_info_v (email) AS SELECT * FROM user_info", + ); +} + #[test] fn test_snowflake_create_tag() { // Snowflake tag definition. From efde648ff3029a80c81c07a270003930d9e309fa Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 00:56:54 +0200 Subject: [PATCH 09/16] feat(snowflake): ALTER SET/UNSET aggregation/join policy + DROP policy/tag Adds the ALTER-time governance operations and policy/tag drops that previously errored or hit the generic fallback: - ALTER TABLE SET { AGGREGATION | JOIN } POLICY [ENTITY KEY (..) | ALLOWED JOIN KEYS (..)] [FORCE] -> AlterTableOperation::SetTablePolicy (reuses TablePolicy). - ALTER TABLE UNSET { AGGREGATION | JOIN | ROW ACCESS } POLICY -> AlterTableOperation::UnsetTablePolicy; UNSET TAG [, ...] -> UnsetTag. - DROP { MASKING | ROW ACCESS | AGGREGATION | PROJECTION | JOIN } POLICY and DROP TAG -> Statement::Drop with new ObjectType::Policy(kind) / ObjectType::Tag. Grammar per Snowflake docs: https://docs.snowflake.com/en/sql-reference/sql/alter-table Fixes 1 corpus test failure (Snowflake). --- src/ast/ddl.rs | 34 +++++++++++++++++++++++++++++- src/ast/mod.rs | 30 ++++++++++++++++----------- src/parser/mod.rs | 40 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 32 +++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 13 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 36b1fd8e06..f262fefc19 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -165,6 +165,18 @@ pub enum AlterTableOperation { /// Note: this is Snowflake specific DropRowAccessPolicy { policy: ObjectName }, + /// `SET { AGGREGATION | JOIN } POLICY [ENTITY KEY (...) | ALLOWED JOIN KEYS (...)] [FORCE]` + /// + /// Snowflake table-level policy application via ALTER. + /// + SetTablePolicy { policy: TablePolicy, force: bool }, + + /// `UNSET { AGGREGATION | JOIN | ROW ACCESS } POLICY` (Snowflake). + UnsetTablePolicy { kind: TablePolicyKind }, + + /// `UNSET TAG [, ...]` (Snowflake). + UnsetTag { keys: Vec }, + /// `ADD PROJECTION [IF NOT EXISTS] (
` and `DROP ALL ROW ACCESS POLICIES ON
` -> Statement::DropRowAccessPolicy { if_exists, all, name, table_name }. Dispatched (via maybe_parse) ahead of the generic DROP; the required `ON
` makes it revert for the Snowflake `DROP ROW ACCESS POLICY ` form, which still maps to the generic Drop with ObjectType::Policy. Grammar per BigQuery docs: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language --- src/ast/mod.rs | 32 +++++++++++++++++++++++++++ src/parser/mod.rs | 43 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_bigquery.rs | 22 +++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a4685970d0..a851b3168c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2087,6 +2087,21 @@ pub enum Statement { /// `FILTER USING ()`. filter_using: Box, }, + /// ```sql + /// DROP ROW ACCESS POLICY [IF EXISTS] ON
+ /// DROP ALL ROW ACCESS POLICIES ON
+ /// ``` + /// BigQuery drop of one (or all) row access policies on a table. + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language + DropRowAccessPolicy { + if_exists: bool, + /// `DROP ALL ROW ACCESS POLICIES` — drop every policy on the table. + all: bool, + /// Policy name (None for the `ALL` form). + name: Option, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + table_name: ObjectName, + }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -3981,6 +3996,23 @@ impl fmt::Display for Statement { write!(f, " FILTER USING ({filter_using})")?; Ok(()) } + Statement::DropRowAccessPolicy { + if_exists, + all, + name, + table_name, + } => { + if *all { + write!(f, "DROP ALL ROW ACCESS POLICIES ON {table_name}") + } else { + write!( + f, + "DROP ROW ACCESS POLICY {}{} ON {table_name}", + if *if_exists { "IF EXISTS " } else { "" }, + name.as_ref().expect("policy name required unless ALL"), + ) + } + } Statement::CreateRole { names, if_not_exists, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 16f52ad3c1..fcdcc7c2cf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6765,6 +6765,15 @@ impl<'a> Parser<'a> { let temporary = dialect_of!(self is MySqlDialect | GenericDialect) && self.parse_keyword(Keyword::TEMPORARY); + // BigQuery row-level security drop: `DROP [ALL] ROW ACCESS POLICY[IES] + // ... ON
`. The required `ON
` makes this revert for the + // Snowflake `DROP ROW ACCESS POLICY ` form (handled below). + if !temporary { + if let Some(stmt) = self.maybe_parse(|p| p.parse_drop_row_access_policy()) { + return Ok(stmt); + } + } + let object_type = if self.parse_keyword(Keyword::TABLE) { ObjectType::Table } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEW]) { @@ -6841,6 +6850,40 @@ impl<'a> Parser<'a> { }) } + /// Parse a BigQuery `DROP [ALL] ROW ACCESS POLICY[IES] ... ON
`. + /// Errors (so the caller's `maybe_parse` reverts) when it isn't this shape, + /// e.g. the Snowflake `DROP ROW ACCESS POLICY ` form that lacks `ON`. + /// + fn parse_drop_row_access_policy(&mut self) -> Result { + let all = self.parse_keyword(Keyword::ALL); + if !self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS]) { + return self.expected("ROW ACCESS after DROP [ALL]", self.peek_token()); + } + // POLICY / POLICIES (plural isn't a reserved keyword). + if all { + if !self.parse_word_ci("POLICIES") { + return self.expected("POLICIES after DROP ALL ROW ACCESS", self.peek_token()); + } + } else if !self.parse_keyword(Keyword::POLICY) { + return self.expected("POLICY after DROP ROW ACCESS", self.peek_token()); + } + let if_exists = !all && self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = if all { + None + } else { + Some(self.parse_object_name(false)?) + }; + // `ON
` is required; its absence reverts to the Snowflake form. + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + Ok(Statement::DropRowAccessPolicy { + if_exists, + all, + name, + table_name, + }) + } + /// ```sql /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] /// [ CASCADE | RESTRICT ] diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 3d431904c3..ba6a5c998f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1404,6 +1404,28 @@ fn parse_create_row_access_policy() { } } +#[test] +fn parse_drop_row_access_policy() { + // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language + bigquery().verified_stmt("DROP ROW ACCESS POLICY my_policy ON project.dataset.t"); + bigquery().verified_stmt("DROP ROW ACCESS POLICY IF EXISTS p ON ds.t"); + bigquery().verified_stmt("DROP ALL ROW ACCESS POLICIES ON ds.t"); + + match bigquery().verified_stmt("DROP ALL ROW ACCESS POLICIES ON ds.t") { + Statement::DropRowAccessPolicy { + all, + name, + table_name, + .. + } => { + assert!(all); + assert!(name.is_none()); + assert_eq!(table_name.to_string(), "ds.t"); + } + other => panic!("expected DropRowAccessPolicy, got {other:?}"), + } +} + fn bigquery_unescaped() -> TestedDialects { TestedDialects { dialects: vec![Box::new(BigQueryDialect {})], From e3f2cd362b09118d8a63e384deb2015af25750e7 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 01:21:26 +0200 Subject: [PATCH 13/16] feat(postgres): parse CREATE/ALTER/DROP POLICY and ROW LEVEL SECURITY toggles PostgreSQL row-security DDL: - CREATE POLICY ON
[AS {PERMISSIVE|RESTRICTIVE}] [FOR ] [TO ,...] [USING (expr)] [WITH CHECK (expr)] -> Statement::CreatePostgresPolicy. - ALTER POLICY ... (RENAME TO / TO / USING / WITH CHECK) -> AlterPostgresPolicy. - DROP POLICY [IF EXISTS] ON
[CASCADE|RESTRICT] -> DropPostgresPolicy. - ALTER TABLE { ENABLE | DISABLE | FORCE | NO FORCE } ROW LEVEL SECURITY -> AlterTableOperation::RowLevelSecurity. USING / WITH CHECK predicates are parsed as real Expr, so their column/table references (including subqueries) stay visible to lineage. Grammar per PostgreSQL docs: https://www.postgresql.org/docs/current/sql-createpolicy.html https://www.postgresql.org/docs/current/sql-altertable.html --- src/ast/ddl.rs | 55 +++++++++++ src/ast/mod.rs | 128 +++++++++++++++++++++++- src/parser/mod.rs | 192 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 40 ++++++++ 4 files changed, 412 insertions(+), 3 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f262fefc19..5653557ecd 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -177,6 +177,11 @@ pub enum AlterTableOperation { /// `UNSET TAG [, ...]` (Snowflake). UnsetTag { keys: Vec }, + /// PostgreSQL row-level security toggle: + /// `{ ENABLE | DISABLE | FORCE | NO FORCE } ROW LEVEL SECURITY`. + /// + RowLevelSecurity { mode: RowLevelSecurityMode }, + /// `ADD PROJECTION [IF NOT EXISTS] (
+ /// [ AS { PERMISSIVE | RESTRICTIVE } ] + /// [ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ] + /// [ TO [, ...] ] + /// [ USING ( ) ] [ WITH CHECK ( ) ] + /// ``` + /// PostgreSQL row-security policy. + /// [Postgres]: https://www.postgresql.org/docs/current/sql-createpolicy.html + CreatePostgresPolicy { + name: Ident, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + table_name: ObjectName, + /// `AS PERMISSIVE` (Some(true)) / `AS RESTRICTIVE` (Some(false)). + permissive: Option, + /// `FOR `. + command: Option, + /// `TO [, ...]` (includes PUBLIC / CURRENT_ROLE / ...). + to_roles: Vec, + using: Option>, + with_check: Option>, + }, + /// ```sql + /// ALTER POLICY ON
RENAME TO + /// ALTER POLICY ON
[ TO [, ...] ] [ USING (...) ] [ WITH CHECK (...) ] + /// ``` + /// [Postgres]: https://www.postgresql.org/docs/current/sql-alterpolicy.html + AlterPostgresPolicy { + name: Ident, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + table_name: ObjectName, + /// `RENAME TO ` (mutually exclusive with the other fields). + rename_to: Option, + to_roles: Vec, + using: Option>, + with_check: Option>, + }, + /// `DROP POLICY [IF EXISTS] ON
[CASCADE | RESTRICT]` + /// [Postgres]: https://www.postgresql.org/docs/current/sql-droppolicy.html + DropPostgresPolicy { + if_exists: bool, + name: Ident, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + table_name: ObjectName, + option: Option, + }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -3996,6 +4043,81 @@ impl fmt::Display for Statement { write!(f, " FILTER USING ({filter_using})")?; Ok(()) } + Statement::CreatePostgresPolicy { + name, + table_name, + permissive, + command, + to_roles, + using, + with_check, + } => { + write!(f, "CREATE POLICY {name} ON {table_name}")?; + if let Some(permissive) = permissive { + write!( + f, + " AS {}", + if *permissive { + "PERMISSIVE" + } else { + "RESTRICTIVE" + } + )?; + } + if let Some(command) = command { + write!(f, " FOR {command}")?; + } + if !to_roles.is_empty() { + write!(f, " TO {}", display_comma_separated(to_roles))?; + } + if let Some(using) = using { + write!(f, " USING ({using})")?; + } + if let Some(with_check) = with_check { + write!(f, " WITH CHECK ({with_check})")?; + } + Ok(()) + } + Statement::AlterPostgresPolicy { + name, + table_name, + rename_to, + to_roles, + using, + with_check, + } => { + write!(f, "ALTER POLICY {name} ON {table_name}")?; + if let Some(rename_to) = rename_to { + write!(f, " RENAME TO {rename_to}")?; + } else { + if !to_roles.is_empty() { + write!(f, " TO {}", display_comma_separated(to_roles))?; + } + if let Some(using) = using { + write!(f, " USING ({using})")?; + } + if let Some(with_check) = with_check { + write!(f, " WITH CHECK ({with_check})")?; + } + } + Ok(()) + } + Statement::DropPostgresPolicy { + if_exists, + name, + table_name, + option, + } => { + write!( + f, + "DROP POLICY {}{name} ON {table_name}", + if *if_exists { "IF EXISTS " } else { "" }, + )?; + if let Some(option) = option { + write!(f, " {option}")?; + } + Ok(()) + } Statement::DropRowAccessPolicy { if_exists, all, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fcdcc7c2cf..26191b6ca0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4900,6 +4900,9 @@ impl<'a> Parser<'a> { self.parse_create_semantic_view(or_replace) } else if self.parse_keyword(Keyword::TAG) { self.parse_create_tag(or_replace) + } else if self.parse_keyword(Keyword::POLICY) { + // PostgreSQL row-security policy: `CREATE POLICY ON
...` + self.parse_create_postgres_policy() } else if let Some(stmt) = self.maybe_parse(|p| p.parse_create_snowflake_policy(or_replace)) { // Snowflake security/governance policy definitions @@ -5065,6 +5068,182 @@ impl<'a> Parser<'a> { }) } + /// Parse a PostgreSQL `CREATE POLICY`. `POLICY` has already been consumed. + /// ` ON
[AS {PERMISSIVE|RESTRICTIVE}] [FOR ] [TO ,...] + /// [USING (expr)] [WITH CHECK (expr)]`. + /// + fn parse_create_postgres_policy(&mut self) -> Result { + let name = self.parse_identifier_no_span()?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let permissive = if self.parse_keyword(Keyword::AS) { + if self.parse_word_ci("PERMISSIVE") { + Some(true) + } else if self.parse_word_ci("RESTRICTIVE") { + Some(false) + } else { + return self.expected("PERMISSIVE or RESTRICTIVE after AS", self.peek_token()); + } + } else { + None + }; + let command = self.parse_optional_policy_command()?; + let to_roles = if self.parse_keyword(Keyword::TO) { + self.parse_comma_separated(|p| p.parse_identifier_no_span())? + } else { + vec![] + }; + let using = self.parse_optional_parenthesized_expr(Keyword::USING)?; + let with_check = if self.parse_keywords(&[Keyword::WITH, Keyword::CHECK]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(Box::new(expr)) + } else { + None + }; + Ok(Statement::CreatePostgresPolicy { + name, + table_name, + permissive, + command, + to_roles, + using, + with_check, + }) + } + + /// Parse a PostgreSQL `ALTER POLICY ON
...`. `POLICY` already consumed. + /// + fn parse_alter_postgres_policy(&mut self) -> Result { + let name = self.parse_identifier_no_span()?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let rename_to = self.parse_identifier_no_span()?; + return Ok(Statement::AlterPostgresPolicy { + name, + table_name, + rename_to: Some(rename_to), + to_roles: vec![], + using: None, + with_check: None, + }); + } + let to_roles = if self.parse_keyword(Keyword::TO) { + self.parse_comma_separated(|p| p.parse_identifier_no_span())? + } else { + vec![] + }; + let using = self.parse_optional_parenthesized_expr(Keyword::USING)?; + let with_check = if self.parse_keywords(&[Keyword::WITH, Keyword::CHECK]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(Box::new(expr)) + } else { + None + }; + Ok(Statement::AlterPostgresPolicy { + name, + table_name, + rename_to: None, + to_roles, + using, + with_check, + }) + } + + /// Parse a PostgreSQL `DROP POLICY [IF EXISTS] ON
[CASCADE|RESTRICT]`. + /// `POLICY` has already been consumed. + /// + fn parse_drop_postgres_policy(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier_no_span()?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + _ => None, + }; + Ok(Statement::DropPostgresPolicy { + if_exists, + name, + table_name, + option, + }) + } + + /// PostgreSQL `{ ENABLE | DISABLE | FORCE | NO FORCE } ROW LEVEL SECURITY`. + /// Consumes the whole clause only when `ROW LEVEL SECURITY` follows the mode + /// (so bare `FORCE` / `ENABLE` used by other clauses aren't swallowed). + fn parse_optional_row_level_security( + &mut self, + ) -> Result, ParserError> { + let start = self.index; + // ENABLE / DISABLE aren't reserved keywords; match by value. + let mode = if self.parse_word_ci("ENABLE") { + RowLevelSecurityMode::Enable + } else if self.parse_word_ci("DISABLE") { + RowLevelSecurityMode::Disable + } else if self.parse_keywords(&[Keyword::NO, Keyword::FORCE]) { + RowLevelSecurityMode::NoForce + } else if self.parse_keyword(Keyword::FORCE) { + RowLevelSecurityMode::Force + } else { + return Ok(None); + }; + if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { + Ok(Some(mode)) + } else { + self.index = start; // not a row-level-security toggle; revert + Ok(None) + } + } + + /// `FOR { ALL | SELECT | INSERT | UPDATE | DELETE }` (PostgreSQL policy command). + fn parse_optional_policy_command(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::FOR) { + return Ok(None); + } + let cmd = match self.parse_one_of_keywords(&[ + Keyword::ALL, + Keyword::SELECT, + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + ]) { + Some(Keyword::ALL) => PolicyCommand::All, + Some(Keyword::SELECT) => PolicyCommand::Select, + Some(Keyword::INSERT) => PolicyCommand::Insert, + Some(Keyword::UPDATE) => PolicyCommand::Update, + Some(Keyword::DELETE) => PolicyCommand::Delete, + _ => { + return self.expected( + "ALL, SELECT, INSERT, UPDATE or DELETE after FOR", + self.peek_token(), + ) + } + }; + Ok(Some(cmd)) + } + + /// Parse an optional ` ( )` clause, returning the boxed expr. + fn parse_optional_parenthesized_expr( + &mut self, + keyword: Keyword, + ) -> Result>, ParserError> { + if self.parse_keyword(keyword) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Some(Box::new(expr))) + } else { + Ok(None) + } + } + /// Parse a Snowflake `CREATE TAG` definition. `TAG` has already been consumed. /// `CREATE [OR REPLACE] TAG [IF NOT EXISTS] [ALLOWED_VALUES '' [, ...]] /// [ = ...]`. @@ -6774,6 +6953,12 @@ impl<'a> Parser<'a> { } } + // PostgreSQL `DROP POLICY [IF EXISTS] ON
[CASCADE|RESTRICT]`. + // (Snowflake `DROP POLICY` is prefixed, so bare `POLICY` is Postgres.) + if !temporary && self.parse_keyword(Keyword::POLICY) { + return self.parse_drop_postgres_policy(); + } + let object_type = if self.parse_keyword(Keyword::TABLE) { ObjectType::Table } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEW]) { @@ -10008,6 +10193,9 @@ impl<'a> Parser<'a> { let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; self.expect_token(&Token::RParen)?; AlterTableOperation::ClusterBy { exprs } + } else if let Some(mode) = self.parse_optional_row_level_security()? { + // PostgreSQL: { ENABLE | DISABLE | FORCE | NO FORCE } ROW LEVEL SECURITY + AlterTableOperation::RowLevelSecurity { mode } } else { return self.expected( "ADD, RENAME, PARTITION, SWAP or DROP after ALTER TABLE", @@ -10043,6 +10231,10 @@ impl<'a> Parser<'a> { operations, }); } + // PostgreSQL `ALTER POLICY ON
...`. + if self.parse_keyword(Keyword::POLICY) { + return self.parse_alter_postgres_policy(); + } let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, Keyword::TABLE, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index c33d723f2f..e78bd96a6f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2882,6 +2882,46 @@ fn pg() -> TestedDialects { } } +#[test] +fn parse_create_alter_drop_policy() { + // PostgreSQL row-security policies. + // https://www.postgresql.org/docs/current/sql-createpolicy.html + pg().verified_stmt("CREATE POLICY my_policy ON my_table USING (user_id = current_user)"); + pg().verified_stmt( + "CREATE POLICY p ON t AS RESTRICTIVE FOR SELECT TO PUBLIC, alice USING (a > 1) WITH CHECK (b < 2)", + ); + pg().verified_stmt("ALTER POLICY p ON t RENAME TO p2"); + pg().verified_stmt("ALTER POLICY p ON t TO r USING (x)"); + pg().verified_stmt("DROP POLICY p ON t"); + pg().verified_stmt("DROP POLICY IF EXISTS p ON t CASCADE"); + + // ALTER TABLE row-level-security toggles. + // https://www.postgresql.org/docs/current/sql-altertable.html + pg().verified_stmt("ALTER TABLE t ENABLE ROW LEVEL SECURITY"); + pg().verified_stmt("ALTER TABLE t DISABLE ROW LEVEL SECURITY"); + pg().verified_stmt("ALTER TABLE t FORCE ROW LEVEL SECURITY"); + pg().verified_stmt("ALTER TABLE t NO FORCE ROW LEVEL SECURITY"); + + // The predicate (and its column/table refs) is preserved for lineage. + match pg().verified_stmt( + "CREATE POLICY pol ON accounts FOR SELECT TO mgr USING (dept IN (SELECT dept FROM allowed))", + ) { + Statement::CreatePostgresPolicy { + name, + table_name, + command, + using, + .. + } => { + assert_eq!(name.to_string(), "pol"); + assert_eq!(table_name.to_string(), "accounts"); + assert_eq!(command, Some(PolicyCommand::Select)); + assert!(using.unwrap().to_string().contains("allowed")); + } + other => panic!("expected CreatePostgresPolicy, got {other:?}"), + } +} + fn pg_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})], From 154df25133af1caf7a956c8f4b99245009196d20 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 01:29:11 +0200 Subject: [PATCH 14/16] feat(mssql): parse SECURITY POLICY DDL and column MASKED WITH SQL Server row-level security and dynamic data masking: - CREATE/ALTER/DROP SECURITY POLICY with comma-separated { ADD | ALTER | DROP } { FILTER | BLOCK } PREDICATE () ON
[] actions, plus WITH (STATE/SCHEMABINDING) and NOT FOR REPLICATION -> Statement::{Create,Alter,Drop}SecurityPolicy + SecurityPolicyPredicate. - Column dynamic data masking `MASKED WITH (FUNCTION = '')` -> ColumnOption::MaskedWith. Each predicate keeps its function name and target table for lineage. Grammar per SQL Server docs: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-security-policy-transact-sql https://learn.microsoft.com/en-us/sql/relational-databases/security/dynamic-data-masking --- src/ast/ddl.rs | 105 +++++++++++++++++++++++ src/ast/mod.rs | 89 +++++++++++++++++++- src/parser/mod.rs | 175 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_mssql.rs | 38 +++++++++ 4 files changed, 404 insertions(+), 3 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 5653557ecd..c3a721d4b3 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1059,6 +1059,13 @@ pub enum ColumnOption { seed: Option, increment: Option, }, + /// SQL Server dynamic data masking: `MASKED WITH (FUNCTION = '')`. + /// The mask function descriptor is an opaque string literal (e.g. + /// `partial(1,"xxx",1)`, `email()`, `default()`). + /// + MaskedWith { + function: String, + }, } impl fmt::Display for ColumnOption { @@ -1174,6 +1181,13 @@ impl fmt::Display for ColumnOption { } Ok(()) } + MaskedWith { function } => { + write!( + f, + "MASKED WITH (FUNCTION = '{}')", + escape_single_quote_string(function) + ) + } } } } @@ -1489,6 +1503,97 @@ impl fmt::Display for PolicyKind { } } +/// One predicate action inside a SQL Server `CREATE/ALTER SECURITY POLICY`. +/// `{ ADD | ALTER | DROP } { FILTER | BLOCK } PREDICATE [()] ON
+/// []`. +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct SecurityPolicyPredicate { + pub op: SecurityPolicyPredicateOp, + pub kind: SecurityPolicyPredicateKind, + /// The table-valued predicate function (None for `DROP`). + pub function: Option, + /// The function's column/expression arguments (empty for `DROP`). + pub args: Vec, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + pub block_dml: Option, +} + +impl fmt::Display for SecurityPolicyPredicate { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {} PREDICATE", self.op, self.kind)?; + if let Some(function) = &self.function { + write!(f, " {function}({})", display_comma_separated(&self.args))?; + } + write!(f, " ON {}", self.table_name)?; + if let Some(block_dml) = &self.block_dml { + write!(f, " {block_dml}")?; + } + Ok(()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SecurityPolicyPredicateOp { + Add, + Alter, + Drop, +} + +impl fmt::Display for SecurityPolicyPredicateOp { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + SecurityPolicyPredicateOp::Add => "ADD", + SecurityPolicyPredicateOp::Alter => "ALTER", + SecurityPolicyPredicateOp::Drop => "DROP", + }) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SecurityPolicyPredicateKind { + Filter, + Block, +} + +impl fmt::Display for SecurityPolicyPredicateKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + SecurityPolicyPredicateKind::Filter => "FILTER", + SecurityPolicyPredicateKind::Block => "BLOCK", + }) + } +} + +/// The block-DML qualifier on a SQL Server security-policy BLOCK predicate. +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SecurityPolicyBlockDml { + AfterInsert, + AfterUpdate, + BeforeUpdate, + BeforeDelete, +} + +impl fmt::Display for SecurityPolicyBlockDml { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + SecurityPolicyBlockDml::AfterInsert => "AFTER INSERT", + SecurityPolicyBlockDml::AfterUpdate => "AFTER UPDATE", + SecurityPolicyBlockDml::BeforeUpdate => "BEFORE UPDATE", + SecurityPolicyBlockDml::BeforeDelete => "BEFORE DELETE", + }) + } +} + /// PostgreSQL `ALTER TABLE ... { ENABLE | DISABLE | FORCE | NO FORCE } ROW LEVEL SECURITY`. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c7808cb13f..5192717ac8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,9 +40,10 @@ pub use self::ddl::{ ColumnMask, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, PolicyArg, PolicyCommand, PolicyKind, ProcedureParam, - ReferentialAction, RowLevelSecurityMode, TableConstraint, TablePolicy, TablePolicyKind, - TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, - ViewSecurity, + ReferentialAction, RowLevelSecurityMode, SecurityPolicyBlockDml, SecurityPolicyPredicate, + SecurityPolicyPredicateKind, SecurityPolicyPredicateOp, TableConstraint, TablePolicy, + TablePolicyKind, TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeRepresentation, ViewSecurity, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ @@ -2149,6 +2150,34 @@ pub enum Statement { table_name: ObjectName, option: Option, }, + /// ```sql + /// CREATE SECURITY POLICY + /// { ADD [FILTER|BLOCK] PREDICATE () ON
[] } [, ...] + /// [ WITH ( STATE = {ON|OFF} [, SCHEMABINDING = {ON|OFF}] ) ] + /// [ NOT FOR REPLICATION ] + /// ``` + /// SQL Server row-level security policy. + /// [T-SQL]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-security-policy-transact-sql + CreateSecurityPolicy { + name: ObjectName, + predicates: Vec, + /// `WITH (STATE = {ON|OFF})`. + state: Option, + /// `SCHEMABINDING = {ON|OFF}`. + schemabinding: Option, + not_for_replication: bool, + }, + /// `ALTER SECURITY POLICY { ADD|ALTER|DROP ... PREDICATE ... } [, ...] [WITH (STATE=...)]` + /// [T-SQL]: https://learn.microsoft.com/en-us/sql/t-sql/statements/alter-security-policy-transact-sql + AlterSecurityPolicy { + name: ObjectName, + predicates: Vec, + state: Option, + not_for_replication: bool, + }, + /// `DROP SECURITY POLICY [IF EXISTS] ` + /// [T-SQL]: https://learn.microsoft.com/en-us/sql/t-sql/statements/drop-security-policy-transact-sql + DropSecurityPolicy { if_exists: bool, name: ObjectName }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -4043,6 +4072,60 @@ impl fmt::Display for Statement { write!(f, " FILTER USING ({filter_using})")?; Ok(()) } + Statement::CreateSecurityPolicy { + name, + predicates, + state, + schemabinding, + not_for_replication, + } => { + write!(f, "CREATE SECURITY POLICY {name}")?; + if !predicates.is_empty() { + write!(f, " {}", display_comma_separated(predicates))?; + } + let mut with_opts: Vec = vec![]; + if let Some(state) = state { + with_opts.push(format!("STATE = {}", if *state { "ON" } else { "OFF" })); + } + if let Some(sb) = schemabinding { + with_opts.push(format!( + "SCHEMABINDING = {}", + if *sb { "ON" } else { "OFF" } + )); + } + if !with_opts.is_empty() { + write!(f, " WITH ({})", with_opts.join(", "))?; + } + if *not_for_replication { + write!(f, " NOT FOR REPLICATION")?; + } + Ok(()) + } + Statement::AlterSecurityPolicy { + name, + predicates, + state, + not_for_replication, + } => { + write!(f, "ALTER SECURITY POLICY {name}")?; + if !predicates.is_empty() { + write!(f, " {}", display_comma_separated(predicates))?; + } + if let Some(state) = state { + write!(f, " WITH (STATE = {})", if *state { "ON" } else { "OFF" })?; + } + if *not_for_replication { + write!(f, " NOT FOR REPLICATION")?; + } + Ok(()) + } + Statement::DropSecurityPolicy { if_exists, name } => { + write!( + f, + "DROP SECURITY POLICY {}{name}", + if *if_exists { "IF EXISTS " } else { "" } + ) + } Statement::CreatePostgresPolicy { name, table_name, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 26191b6ca0..c04105434b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4900,6 +4900,9 @@ impl<'a> Parser<'a> { self.parse_create_semantic_view(or_replace) } else if self.parse_keyword(Keyword::TAG) { self.parse_create_tag(or_replace) + } else if self.parse_keywords(&[Keyword::SECURITY, Keyword::POLICY]) { + // SQL Server row-level security: `CREATE SECURITY POLICY ADD ...` + self.parse_create_security_policy() } else if self.parse_keyword(Keyword::POLICY) { // PostgreSQL row-security policy: `CREATE POLICY ON
...` self.parse_create_postgres_policy() @@ -5202,6 +5205,157 @@ impl<'a> Parser<'a> { } } + /// Parse a SQL Server `CREATE SECURITY POLICY`. `SECURITY POLICY` consumed. + /// + fn parse_create_security_policy(&mut self) -> Result { + let name = self.parse_object_name(false)?; + let predicates = self.parse_comma_separated(Parser::parse_security_policy_predicate)?; + let (state, schemabinding) = self.parse_security_policy_with_options()?; + let not_for_replication = + self.parse_keywords(&[Keyword::NOT, Keyword::FOR, Keyword::REPLICATION]); + Ok(Statement::CreateSecurityPolicy { + name, + predicates, + state, + schemabinding, + not_for_replication, + }) + } + + /// Parse a SQL Server `ALTER SECURITY POLICY`. `SECURITY POLICY` consumed. + /// + fn parse_alter_security_policy(&mut self) -> Result { + let name = self.parse_object_name(false)?; + // Predicates are optional in ALTER (e.g. `ALTER SECURITY POLICY p WITH (STATE = ON)`). + let predicates = if matches!( + self.peek_token_kind(), + Token::Word(w) if matches!(w.keyword, Keyword::ADD | Keyword::ALTER | Keyword::DROP) + ) { + self.parse_comma_separated(Parser::parse_security_policy_predicate)? + } else { + vec![] + }; + let (state, _schemabinding) = self.parse_security_policy_with_options()?; + let not_for_replication = + self.parse_keywords(&[Keyword::NOT, Keyword::FOR, Keyword::REPLICATION]); + Ok(Statement::AlterSecurityPolicy { + name, + predicates, + state, + not_for_replication, + }) + } + + /// Parse one `{ ADD | ALTER | DROP } { FILTER | BLOCK } PREDICATE [(args)] + /// ON
[]` clause. + fn parse_security_policy_predicate(&mut self) -> Result { + let op = if self.parse_keyword(Keyword::ADD) { + SecurityPolicyPredicateOp::Add + } else if self.parse_keyword(Keyword::ALTER) { + SecurityPolicyPredicateOp::Alter + } else if self.parse_keyword(Keyword::DROP) { + SecurityPolicyPredicateOp::Drop + } else { + return self.expected("ADD, ALTER or DROP", self.peek_token()); + }; + let kind = if self.parse_keyword(Keyword::FILTER) { + SecurityPolicyPredicateKind::Filter + } else if self.parse_keyword(Keyword::BLOCK) { + SecurityPolicyPredicateKind::Block + } else { + return self.expected("FILTER or BLOCK", self.peek_token()); + }; + if !self.parse_word_ci("PREDICATE") { + return self.expected("PREDICATE", self.peek_token()); + } + // DROP has no function/args; ADD/ALTER carry `()`. + let (function, args) = if op == SecurityPolicyPredicateOp::Drop { + (None, vec![]) + } else { + let function = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + (Some(function), args) + }; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let block_dml = self.parse_optional_security_policy_block_dml(); + Ok(SecurityPolicyPredicate { + op, + kind, + function, + args, + table_name, + block_dml, + }) + } + + /// `AFTER { INSERT | UPDATE } | BEFORE { UPDATE | DELETE }` on a BLOCK predicate. + fn parse_optional_security_policy_block_dml(&mut self) -> Option { + if self.parse_keyword(Keyword::AFTER) { + if self.parse_keyword(Keyword::INSERT) { + Some(SecurityPolicyBlockDml::AfterInsert) + } else if self.parse_keyword(Keyword::UPDATE) { + Some(SecurityPolicyBlockDml::AfterUpdate) + } else { + self.prev_token(); + None + } + } else if self.parse_keyword(Keyword::BEFORE) { + if self.parse_keyword(Keyword::UPDATE) { + Some(SecurityPolicyBlockDml::BeforeUpdate) + } else if self.parse_keyword(Keyword::DELETE) { + Some(SecurityPolicyBlockDml::BeforeDelete) + } else { + self.prev_token(); + None + } + } else { + None + } + } + + /// `WITH ( STATE = {ON|OFF} [, SCHEMABINDING = {ON|OFF}] )`. Returns + /// `(state, schemabinding)`. + fn parse_security_policy_with_options( + &mut self, + ) -> Result<(Option, Option), ParserError> { + if !self.parse_keyword(Keyword::WITH) { + return Ok((None, None)); + } + self.expect_token(&Token::LParen)?; + let mut state = None; + let mut schemabinding = None; + loop { + if self.parse_word_ci("STATE") { + self.expect_token(&Token::Eq)?; + state = Some(self.parse_on_off()?); + } else if self.parse_word_ci("SCHEMABINDING") { + self.expect_token(&Token::Eq)?; + schemabinding = Some(self.parse_on_off()?); + } else { + return self.expected("STATE or SCHEMABINDING", self.peek_token()); + } + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + Ok((state, schemabinding)) + } + + /// Parse `ON` (true) or `OFF` (false); OFF isn't a reserved keyword. + fn parse_on_off(&mut self) -> Result { + if self.parse_keyword(Keyword::ON) { + Ok(true) + } else if self.parse_word_ci("OFF") { + Ok(false) + } else { + self.expected("ON or OFF", self.peek_token()) + } + } + /// `FOR { ALL | SELECT | INSERT | UPDATE | DELETE }` (PostgreSQL policy command). fn parse_optional_policy_command(&mut self) -> Result, ParserError> { if !self.parse_keyword(Keyword::FOR) { @@ -6953,6 +7107,13 @@ impl<'a> Parser<'a> { } } + // SQL Server `DROP SECURITY POLICY [IF EXISTS] `. + if !temporary && self.parse_keywords(&[Keyword::SECURITY, Keyword::POLICY]) { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + return Ok(Statement::DropSecurityPolicy { if_exists, name }); + } + // PostgreSQL `DROP POLICY [IF EXISTS] ON
[CASCADE|RESTRICT]`. // (Snowflake `DROP POLICY` is prefixed, so bare `POLICY` is Postgres.) if !temporary && self.parse_keyword(Keyword::POLICY) { @@ -8851,6 +9012,16 @@ impl<'a> Parser<'a> { Token::DollarQuotedString(s) => Ok(Some(ColumnOption::Comment(s.value))), _ => self.expected("string", next_token), } + } else if self.parse_word_ci("MASKED") { + // SQL Server dynamic data masking: `MASKED WITH (FUNCTION = '')`. + // https://learn.microsoft.com/en-us/sql/relational-databases/security/dynamic-data-masking + self.expect_keyword(Keyword::WITH)?; + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::FUNCTION)?; + self.expect_token(&Token::Eq)?; + let function = self.parse_literal_string()?; + self.expect_token(&Token::RParen)?; + Ok(Some(ColumnOption::MaskedWith { function })) } else if self.parse_keyword(Keyword::NULL) { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { @@ -10231,6 +10402,10 @@ impl<'a> Parser<'a> { operations, }); } + // SQL Server `ALTER SECURITY POLICY ...`. + if self.parse_keywords(&[Keyword::SECURITY, Keyword::POLICY]) { + return self.parse_alter_security_policy(); + } // PostgreSQL `ALTER POLICY ON
...`. if self.parse_keyword(Keyword::POLICY) { return self.parse_alter_postgres_policy(); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index bd9ead2ab2..c453699d40 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -639,6 +639,44 @@ fn ms() -> TestedDialects { options: None, } } + +#[test] +fn parse_security_policy_and_masked() { + // Column dynamic data masking. + // https://learn.microsoft.com/en-us/sql/relational-databases/security/dynamic-data-masking + ms().verified_stmt( + "CREATE TABLE t (Email VARCHAR(100) MASKED WITH (FUNCTION = 'email()') NOT NULL)", + ); + ms().verified_stmt( + "CREATE TABLE t (DiscountCode SMALLINT MASKED WITH (FUNCTION = 'random(1, 100)') NULL)", + ); + + // CREATE / ALTER / DROP SECURITY POLICY (row-level security). + // https://learn.microsoft.com/en-us/sql/t-sql/statements/create-security-policy-transact-sql + ms().verified_stmt( + "CREATE SECURITY POLICY rls.SecPol ADD FILTER PREDICATE rls.fn(TenantId) ON dbo.Sales, ADD BLOCK PREDICATE rls.fn(TenantId) ON dbo.Sales AFTER INSERT WITH (STATE = ON, SCHEMABINDING = ON) NOT FOR REPLICATION", + ); + ms().verified_stmt("ALTER SECURITY POLICY pol WITH (STATE = ON)"); + ms().verified_stmt("ALTER SECURITY POLICY pol DROP FILTER PREDICATE ON dbo.Sales"); + ms().verified_stmt("DROP SECURITY POLICY secPolicy"); + ms().verified_stmt("DROP SECURITY POLICY IF EXISTS secPolicy"); + + // Predicate exposes the function + target table for lineage. + match ms().verified_stmt("CREATE SECURITY POLICY p ADD FILTER PREDICATE s.f(t_id) ON dbo.Sales") + { + Statement::CreateSecurityPolicy { + name, predicates, .. + } => { + assert_eq!(name.to_string(), "p"); + assert_eq!(predicates.len(), 1); + assert_eq!(predicates[0].op, SecurityPolicyPredicateOp::Add); + assert_eq!(predicates[0].kind, SecurityPolicyPredicateKind::Filter); + assert_eq!(predicates[0].function.as_ref().unwrap().to_string(), "s.f"); + assert_eq!(predicates[0].table_name.to_string(), "dbo.Sales"); + } + other => panic!("expected CreateSecurityPolicy, got {other:?}"), + } +} fn ms_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(MsSqlDialect {}), Box::new(GenericDialect {})], From e096c1149102aa150593e1d2c725004cc8ed23f5 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 01:39:14 +0200 Subject: [PATCH 15/16] feat(redshift): parse RLS and MASKING policy DDL Amazon Redshift row-level security and dynamic data masking: - CREATE RLS POLICY [WITH (cols) [[AS] alias]] USING (predicate); CREATE MASKING POLICY [IF NOT EXISTS] WITH (cols) USING (expr,...) -> Statement::CreateRedshiftPolicy. - ATTACH/DETACH { RLS | MASKING } POLICY ... ON
[,...] [(out_cols)] [USING (in_cols)] { TO | FROM } grantee [, ...] [PRIORITY n] -> Statement::AttachRedshiftPolicy (grantees: user / ROLE role / PUBLIC). - DROP RLS POLICY [IF EXISTS] [CASCADE|RESTRICT] -> Statement::DropRedshiftPolicy (MASKING reuses the generic DROP path). - ALTER MASKING POLICY USING (expr,...) -> AlterRedshiftMaskingPolicy. The masking CREATE is dispatched via maybe_parse so other dialects' `MASKING POLICY` shapes (e.g. ClickHouse `... ON ... UPDATE ...`) revert to the fallback. USING predicates/expressions are real Expr nodes, preserving column refs. Grammar per Redshift docs: https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_RLS_POLICY.html https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_MASKING_POLICY.html --- src/ast/ddl.rs | 42 ++++++++ src/ast/mod.rs | 138 +++++++++++++++++++++++- src/parser/mod.rs | 204 +++++++++++++++++++++++++++++++++++- tests/sqlparser_redshift.rs | 52 +++++++++ 4 files changed, 430 insertions(+), 6 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index c3a721d4b3..053694ade3 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1503,6 +1503,48 @@ impl fmt::Display for PolicyKind { } } +/// Kind of an Amazon Redshift governance policy. +/// +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum RedshiftPolicyKind { + /// Row-level security policy (`RLS POLICY`). + Rls, + /// Dynamic data-masking policy (`MASKING POLICY`). + Masking, +} + +impl fmt::Display for RedshiftPolicyKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + RedshiftPolicyKind::Rls => "RLS POLICY", + RedshiftPolicyKind::Masking => "MASKING POLICY", + }) + } +} + +/// A grantee target in a Redshift `ATTACH`/`DETACH` policy statement +/// (`{ | ROLE | PUBLIC }`). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum RedshiftGrantee { + User(Ident), + Role(Ident), + Public, +} + +impl fmt::Display for RedshiftGrantee { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + RedshiftGrantee::User(name) => write!(f, "{name}"), + RedshiftGrantee::Role(name) => write!(f, "ROLE {name}"), + RedshiftGrantee::Public => write!(f, "PUBLIC"), + } + } +} + /// One predicate action inside a SQL Server `CREATE/ALTER SECURITY POLICY`. /// `{ ADD | ALTER | DROP } { FILTER | BLOCK } PREDICATE [()] ON
/// []`. diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5192717ac8..b067cee0de 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,10 +40,10 @@ pub use self::ddl::{ ColumnMask, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateTableLikeOption, Deduplicate, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, PolicyArg, PolicyCommand, PolicyKind, ProcedureParam, - ReferentialAction, RowLevelSecurityMode, SecurityPolicyBlockDml, SecurityPolicyPredicate, - SecurityPolicyPredicateKind, SecurityPolicyPredicateOp, TableConstraint, TablePolicy, - TablePolicyKind, TableProjection, Tag, UserDefinedTypeCompositeAttributeDef, - UserDefinedTypeRepresentation, ViewSecurity, + RedshiftGrantee, RedshiftPolicyKind, ReferentialAction, RowLevelSecurityMode, + SecurityPolicyBlockDml, SecurityPolicyPredicate, SecurityPolicyPredicateKind, + SecurityPolicyPredicateOp, TableConstraint, TablePolicy, TablePolicyKind, TableProjection, Tag, + UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewSecurity, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ @@ -2178,6 +2178,59 @@ pub enum Statement { /// `DROP SECURITY POLICY [IF EXISTS] ` /// [T-SQL]: https://learn.microsoft.com/en-us/sql/t-sql/statements/drop-security-policy-transact-sql DropSecurityPolicy { if_exists: bool, name: ObjectName }, + /// ```sql + /// CREATE RLS POLICY [WITH ( [, ...]) [[AS] ]] USING () + /// CREATE MASKING POLICY [IF NOT EXISTS] WITH ( [, ...]) USING ( [, ...]) + /// ``` + /// Amazon Redshift row-level-security / dynamic-data-masking policy. + /// [Redshift RLS]: https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_RLS_POLICY.html + /// [Redshift DDM]: https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_MASKING_POLICY.html + CreateRedshiftPolicy { + kind: RedshiftPolicyKind, + if_not_exists: bool, + name: ObjectName, + /// `WITH (, ...)` input columns. + with_columns: Vec, + /// RLS `[AS] ` for the relation inside the predicate. + alias: Option, + /// `USING ( [, ...])` predicate (RLS) / masking expression(s). + using: Vec, + }, + /// ```sql + /// ATTACH { RLS | MASKING } POLICY ON [TABLE]
[, ...] + /// [ ( [, ...] ) ] [ USING ( [, ...] ) ] + /// { TO | FROM } { | ROLE | PUBLIC } [, ...] [ PRIORITY ] + /// ``` + /// Redshift attach/detach of a policy to a relation. + /// [Redshift]: https://docs.aws.amazon.com/redshift/latest/dg/r_ATTACH_RLS_POLICY.html + AttachRedshiftPolicy { + kind: RedshiftPolicyKind, + /// `false` = `ATTACH ... TO`, `true` = `DETACH ... FROM`. + detach: bool, + name: ObjectName, + tables: Vec, + /// Masking: output column list directly after the relation. + output_columns: Vec, + /// Masking: `USING (, ...)` input columns. + using_columns: Vec, + grantees: Vec, + /// Masking: `PRIORITY `. + priority: Option, + }, + /// ```sql + /// DROP RLS POLICY [IF EXISTS] [CASCADE | RESTRICT] + /// DROP MASKING POLICY + /// ``` + /// [Redshift]: https://docs.aws.amazon.com/redshift/latest/dg/r_DROP_RLS_POLICY.html + DropRedshiftPolicy { + kind: RedshiftPolicyKind, + if_exists: bool, + name: ObjectName, + option: Option, + }, + /// `ALTER MASKING POLICY USING ( [, ...])` (Redshift). + /// [Redshift]: https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_MASKING_POLICY.html + AlterRedshiftMaskingPolicy { name: ObjectName, using: Vec }, /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, @@ -4126,6 +4179,83 @@ impl fmt::Display for Statement { if *if_exists { "IF EXISTS " } else { "" } ) } + Statement::CreateRedshiftPolicy { + kind, + if_not_exists, + name, + with_columns, + alias, + using, + } => { + write!(f, "CREATE {kind} {name}")?; + if *if_not_exists { + write!(f, " IF NOT EXISTS")?; + } + if !with_columns.is_empty() { + write!(f, " WITH ({}", display_comma_separated(with_columns))?; + write!(f, ")")?; + if let Some(alias) = alias { + write!(f, " AS {alias}")?; + } + } + write!(f, " USING ({})", display_comma_separated(using)) + } + Statement::AttachRedshiftPolicy { + kind, + detach, + name, + tables, + output_columns, + using_columns, + grantees, + priority, + } => { + write!( + f, + "{} {kind} {name} ON {}", + if *detach { "DETACH" } else { "ATTACH" }, + display_comma_separated(tables), + )?; + if !output_columns.is_empty() { + write!(f, " ({})", display_comma_separated(output_columns))?; + } + if !using_columns.is_empty() { + write!(f, " USING ({})", display_comma_separated(using_columns))?; + } + write!( + f, + " {} {}", + if *detach { "FROM" } else { "TO" }, + display_comma_separated(grantees), + )?; + if let Some(priority) = priority { + write!(f, " PRIORITY {priority}")?; + } + Ok(()) + } + Statement::DropRedshiftPolicy { + kind, + if_exists, + name, + option, + } => { + write!( + f, + "DROP {kind} {}{name}", + if *if_exists { "IF EXISTS " } else { "" }, + )?; + if let Some(option) = option { + write!(f, " {option}")?; + } + Ok(()) + } + Statement::AlterRedshiftMaskingPolicy { name, using } => { + write!( + f, + "ALTER MASKING POLICY {name} USING ({})", + display_comma_separated(using) + ) + } Statement::CreatePostgresPolicy { name, table_name, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c04105434b..3cc3303aa0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -739,6 +739,13 @@ impl<'a> Parser<'a> { Keyword::REFRESH => Ok(self.parse_refresh_materialized_view()?), Keyword::UPDATE => Ok(self.parse_update()?), Keyword::ALTER => Ok(self.parse_alter()?), + // Redshift `{ ATTACH | DETACH } { RLS | MASKING } POLICY ...`. + Keyword::ATTACH if self.peek_redshift_policy_kind() => { + Ok(self.parse_redshift_attach_policy(false)?) + } + Keyword::DETACH if self.peek_redshift_policy_kind() => { + Ok(self.parse_redshift_attach_policy(true)?) + } Keyword::EXCHANGE => { self.expect_keyword(Keyword::TABLES)?; let first = self.parse_object_name(false)?; @@ -4903,6 +4910,13 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::SECURITY, Keyword::POLICY]) { // SQL Server row-level security: `CREATE SECURITY POLICY ADD ...` self.parse_create_security_policy() + } else if self.peek_word_ci("RLS") + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.keyword == Keyword::POLICY) + { + // Redshift row-level security: `CREATE RLS POLICY [WITH (...)] USING (...)` + self.next_token(); // RLS + self.next_token(); // POLICY + self.parse_create_redshift_policy(RedshiftPolicyKind::Rls) } else if self.parse_keyword(Keyword::POLICY) { // PostgreSQL row-security policy: `CREATE POLICY ON
...` self.parse_create_postgres_policy() @@ -4911,8 +4925,17 @@ impl<'a> Parser<'a> { // Snowflake security/governance policy definitions // (CREATE { MASKING | ROW ACCESS | AGGREGATION | PROJECTION | JOIN } // POLICY ... AS (sig) RETURNS type -> body). maybe_parse reverts and - // falls through for non-Snowflake shapes (e.g. BigQuery's - // `ROW ACCESS POLICY ... ON table`), handled next. + // falls through for non-Snowflake shapes (handled next). + Ok(stmt) + } else if let Some(stmt) = self.maybe_parse(|p| { + // Redshift dynamic data masking (the Snowflake AS-form was tried + // above and reverted): `CREATE MASKING POLICY [IF NOT EXISTS] + // WITH (...) USING (...)`. maybe_parse reverts for other dialects' + // `MASKING POLICY` shapes (e.g. ClickHouse `... ON ... UPDATE ...`), + // which the generic fallback then handles. + p.expect_keywords(&[Keyword::MASKING, Keyword::POLICY])?; + p.parse_create_redshift_policy(RedshiftPolicyKind::Masking) + }) { Ok(stmt) } else if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { // BigQuery row-level security: @@ -5205,6 +5228,157 @@ impl<'a> Parser<'a> { } } + /// Parse a Redshift `CREATE { RLS | MASKING } POLICY`. The kind keyword(s) + /// and `POLICY` have already been consumed. + /// + /// + fn parse_create_redshift_policy( + &mut self, + kind: RedshiftPolicyKind, + ) -> Result { + let name = self.parse_object_name(false)?; + // Redshift places `IF NOT EXISTS` after the policy name (masking only). + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let (with_columns, alias) = if self.parse_keyword(Keyword::WITH) { + self.expect_token(&Token::LParen)?; + let cols = self.parse_comma_separated(|p| { + let name = p.parse_identifier_no_span()?; + let data_type = p.parse_data_type()?; + Ok(PolicyArg { name, data_type }) + })?; + self.expect_token(&Token::RParen)?; + // RLS only: optional `[AS] `. + let alias = if kind == RedshiftPolicyKind::Rls { + if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier_no_span()?) + } else if matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::USING) + { + None + } else { + Some(self.parse_identifier_no_span()?) + } + } else { + None + }; + (cols, alias) + } else { + (vec![], None) + }; + self.expect_keyword(Keyword::USING)?; + self.expect_token(&Token::LParen)?; + let using = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Ok(Statement::CreateRedshiftPolicy { + kind, + if_not_exists, + name, + with_columns, + alias, + using, + }) + } + + /// Parse a Redshift `{ ATTACH | DETACH } { RLS | MASKING } POLICY ...`. + /// The `ATTACH`/`DETACH` keyword has already been consumed. + /// + fn parse_redshift_attach_policy(&mut self, detach: bool) -> Result { + let kind = self + .parse_redshift_policy_kind() + .ok_or_else(|| ParserError::ParserError("expected RLS or MASKING POLICY".into()))?; + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + let _ = self.parse_keyword(Keyword::TABLE); // optional [TABLE] + let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; + // Masking: output column list directly after the relation, optional + // `USING (input cols)`. + let output_columns = if self.peek_token_is(&Token::LParen) { + self.parse_parenthesized_column_list_no_span()? + } else { + vec![] + }; + let using_columns = if self.parse_keyword(Keyword::USING) { + self.parse_parenthesized_column_list_no_span()? + } else { + vec![] + }; + if detach { + self.expect_keyword(Keyword::FROM)?; + } else { + self.expect_keyword(Keyword::TO)?; + } + let grantees = self.parse_comma_separated(Parser::parse_redshift_grantee)?; + let priority = if self.parse_word_ci("PRIORITY") { + Some(self.parse_literal_uint()?) + } else { + None + }; + Ok(Statement::AttachRedshiftPolicy { + kind, + detach, + name, + tables, + output_columns, + using_columns, + grantees, + priority, + }) + } + + /// Parse a Redshift `ALTER MASKING POLICY USING (...)`. `MASKING + /// POLICY` has already been consumed. + fn parse_alter_redshift_masking_policy(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + self.expect_token(&Token::LParen)?; + let using = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Ok(Statement::AlterRedshiftMaskingPolicy { name, using }) + } + + /// `{ | ROLE | PUBLIC }` grantee in a Redshift attach/detach. + fn parse_redshift_grantee(&mut self) -> Result { + if self.parse_word_ci("PUBLIC") { + Ok(RedshiftGrantee::Public) + } else if self.parse_keyword(Keyword::ROLE) { + Ok(RedshiftGrantee::Role(self.parse_identifier_no_span()?)) + } else { + Ok(RedshiftGrantee::User(self.parse_identifier_no_span()?)) + } + } + + /// True if the next tokens are a Redshift policy kind (`RLS POLICY` / + /// `MASKING POLICY`). Non-consuming; used to guard ATTACH/DETACH dispatch. + fn peek_redshift_policy_kind(&self) -> bool { + let kind_ok = self.peek_word_ci("RLS") + || matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::MASKING); + kind_ok + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.keyword == Keyword::POLICY) + } + + /// Peek-and-consume a Redshift policy kind: `RLS POLICY` / `MASKING POLICY`. + fn parse_redshift_policy_kind(&mut self) -> Option { + if self.peek_word_ci("RLS") + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.keyword == Keyword::POLICY) + { + self.next_token(); + self.next_token(); + Some(RedshiftPolicyKind::Rls) + } else if self.parse_keywords(&[Keyword::MASKING, Keyword::POLICY]) { + Some(RedshiftPolicyKind::Masking) + } else { + None + } + } + + /// Parse a parenthesized comma-separated identifier list, returning plain + /// `Ident`s (no span). + fn parse_parenthesized_column_list_no_span(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let cols = self.parse_comma_separated(|p| p.parse_identifier_no_span())?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } + /// Parse a SQL Server `CREATE SECURITY POLICY`. `SECURITY POLICY` consumed. /// fn parse_create_security_policy(&mut self) -> Result { @@ -7114,6 +7288,28 @@ impl<'a> Parser<'a> { return Ok(Statement::DropSecurityPolicy { if_exists, name }); } + // Redshift `DROP RLS POLICY [IF EXISTS] [CASCADE|RESTRICT]`. + if !temporary + && self.peek_word_ci("RLS") + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.keyword == Keyword::POLICY) + { + self.next_token(); // RLS + self.next_token(); // POLICY + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + _ => None, + }; + return Ok(Statement::DropRedshiftPolicy { + kind: RedshiftPolicyKind::Rls, + if_exists, + name, + option, + }); + } + // PostgreSQL `DROP POLICY [IF EXISTS] ON
[CASCADE|RESTRICT]`. // (Snowflake `DROP POLICY` is prefixed, so bare `POLICY` is Postgres.) if !temporary && self.parse_keyword(Keyword::POLICY) { @@ -10406,6 +10602,10 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::SECURITY, Keyword::POLICY]) { return self.parse_alter_security_policy(); } + // Redshift `ALTER MASKING POLICY USING (...)`. + if self.parse_keywords(&[Keyword::MASKING, Keyword::POLICY]) { + return self.parse_alter_redshift_masking_policy(); + } // PostgreSQL `ALTER POLICY ON
...`. if self.parse_keyword(Keyword::POLICY) { return self.parse_alter_postgres_policy(); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index d91ed1a4f1..0e593e2a93 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -317,6 +317,58 @@ fn redshift() -> TestedDialects { } } +#[test] +fn parse_rls_and_masking_policies() { + // Row-level security. + // https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_RLS_POLICY.html + redshift().verified_stmt( + "CREATE RLS POLICY policy_concerts WITH (catgroup VARCHAR(10)) USING (catgroup = 'Concerts')", + ); + redshift().verified_stmt("CREATE RLS POLICY p USING (true)"); + redshift().verified_stmt( + "ATTACH RLS POLICY policy_concerts ON tickit_category TO ROLE analyst, ROLE dbadmin", + ); + redshift() + .verified_stmt("DETACH RLS POLICY policy_concerts ON tickit_category FROM ROLE analyst"); + redshift().verified_stmt("DROP RLS POLICY policy_concerts"); + redshift().verified_stmt("DROP RLS POLICY IF EXISTS p CASCADE"); + + // Dynamic data masking. + // https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_MASKING_POLICY.html + redshift().verified_stmt( + "CREATE MASKING POLICY mask_cc IF NOT EXISTS WITH (credit_card VARCHAR(256)) USING ('000000XXXX0000')", + ); + redshift().verified_stmt( + "ATTACH MASKING POLICY mask_cc ON credit_cards (credit_card) TO ROLE analyst PRIORITY 30", + ); + redshift().verified_stmt( + "DETACH MASKING POLICY mask_cc ON credit_cards (credit_card) FROM ROLE analyst", + ); + redshift().verified_stmt("DROP MASKING POLICY mask_cc"); + redshift().verified_stmt("ALTER MASKING POLICY mask_cc USING ('000000XXXX0000')"); + + // The RLS predicate keeps its column references for lineage. + match redshift() + .verified_stmt("CREATE RLS POLICY pol WITH (region VARCHAR(10)) AS t USING (region = 'US')") + { + Statement::CreateRedshiftPolicy { + kind, + name, + with_columns, + alias, + using, + .. + } => { + assert_eq!(kind, RedshiftPolicyKind::Rls); + assert_eq!(name.to_string(), "pol"); + assert_eq!(with_columns.len(), 1); + assert_eq!(alias.unwrap().to_string(), "t"); + assert!(using[0].to_string().contains("region")); + } + other => panic!("expected CreateRedshiftPolicy, got {other:?}"), + } +} + #[test] fn parse_pg_like_match_ops_in_view_body() { // `pg_get_viewdef` / `SHOW VIEW` in Redshift normalize ILIKE / NOT ILIKE From a95381fb28b6dad449b7a50bd210eddc0ebed0d9 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 2 Jun 2026 01:44:04 +0200 Subject: [PATCH 16/16] feat(databricks,mssql): ALTER mask / row filter / tag operations Completes the ALTER-time governance operations: - Databricks ALTER TABLE DROP ROW FILTER; SET/UNSET TAGS ('k' = 'v', ...); ALTER COLUMN SET MASK [USING COLUMNS (...)] / DROP MASK; ALTER COLUMN SET/UNSET TAGS. (SET ROW FILTER already reused SetTablePolicy.) - SQL Server ALTER COLUMN ADD MASKED WITH (FUNCTION = '') / DROP MASKED. New AlterTableOperation::{DropRowFilter, SetTags, UnsetTags} and AlterColumnOperation::{SetMask, DropMask, AddMasked, DropMasked, SetTags, UnsetTags}. Grammar per docs: https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-alter-table https://learn.microsoft.com/en-us/sql/relational-databases/security/dynamic-data-masking --- src/ast/ddl.rs | 65 ++++++++++++++++++++++++ src/parser/mod.rs | 96 ++++++++++++++++++++++++++++++++++- tests/sqlparser_databricks.rs | 15 ++++++ tests/sqlparser_mssql.rs | 4 ++ 4 files changed, 178 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 053694ade3..0c02a1cc43 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -177,6 +177,16 @@ pub enum AlterTableOperation { /// `UNSET TAG [, ...]` (Snowflake). UnsetTag { keys: Vec }, + /// Databricks `DROP ROW FILTER`. + /// + DropRowFilter, + + /// Databricks `SET TAGS ('k' = 'v', ...)`. + SetTags { tags: Vec }, + + /// Databricks `UNSET TAGS ('k', ...)`. + UnsetTags { keys: Vec }, + /// PostgreSQL row-level security toggle: /// `{ ENABLE | DISABLE | FORCE | NO FORCE } ROW LEVEL SECURITY`. /// @@ -445,6 +455,20 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::RowLevelSecurity { mode } => { write!(f, "{mode} ROW LEVEL SECURITY") } + AlterTableOperation::DropRowFilter => write!(f, "DROP ROW FILTER"), + AlterTableOperation::SetTags { tags } => { + write!(f, "SET TAGS ({})", display_comma_separated(tags)) + } + AlterTableOperation::UnsetTags { keys } => { + write!(f, "UNSET TAGS (")?; + for (i, k) in keys.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "'{}'", escape_single_quote_string(k))?; + } + write!(f, ")") + } AlterTableOperation::AddProjection { if_not_exists, projection, @@ -543,6 +567,26 @@ pub enum AlterColumnOperation { SetOptions { options: Vec, }, + /// Databricks `SET MASK [USING COLUMNS (...)]`. + SetMask { + mask: ColumnMask, + }, + /// Databricks `DROP MASK`. + DropMask, + /// SQL Server `ADD MASKED WITH (FUNCTION = '')`. + AddMasked { + function: String, + }, + /// SQL Server `DROP MASKED`. + DropMasked, + /// Databricks `SET TAGS ('k' = 'v', ...)`. + SetTags { + tags: Vec, + }, + /// Databricks `UNSET TAGS ('k', ...)`. + UnsetTags { + keys: Vec, + }, } impl fmt::Display for AlterColumnOperation { @@ -566,6 +610,27 @@ impl fmt::Display for AlterColumnOperation { AlterColumnOperation::SetOptions { options } => { write!(f, "SET OPTIONS({})", display_comma_separated(options)) } + AlterColumnOperation::SetMask { mask } => write!(f, "SET {mask}"), + AlterColumnOperation::DropMask => write!(f, "DROP MASK"), + AlterColumnOperation::AddMasked { function } => write!( + f, + "ADD MASKED WITH (FUNCTION = '{}')", + escape_single_quote_string(function) + ), + AlterColumnOperation::DropMasked => write!(f, "DROP MASKED"), + AlterColumnOperation::SetTags { tags } => { + write!(f, "SET TAGS ({})", display_comma_separated(tags)) + } + AlterColumnOperation::UnsetTags { keys } => { + write!(f, "UNSET TAGS (")?; + for (i, k) in keys.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "'{}'", escape_single_quote_string(k))?; + } + write!(f, ")") + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3cc3303aa0..455eb0ae55 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5335,6 +5335,30 @@ impl<'a> Parser<'a> { Ok(Statement::AlterRedshiftMaskingPolicy { name, using }) } + /// Databricks `( 'k' = 'v', ... )` tag list (string keys and values). + fn parse_databricks_tags(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let tags = self.parse_comma_separated(|p| { + let key = p.parse_literal_string()?; + p.expect_token(&Token::Eq)?; + let value = p.parse_literal_string()?; + Ok(Tag { + name: ObjectName(vec![Ident::with_quote('\'', key)]), + value, + }) + })?; + self.expect_token(&Token::RParen)?; + Ok(tags) + } + + /// Databricks `( 'k', ... )` tag-key list (for `UNSET TAGS`). + fn parse_databricks_tag_keys(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let keys = self.parse_comma_separated(|p| p.parse_literal_string())?; + self.expect_token(&Token::RParen)?; + Ok(keys) + } + /// `{ | ROLE | PUBLIC }` grantee in a Redshift attach/detach. fn parse_redshift_grantee(&mut self) -> Result { if self.parse_word_ci("PUBLIC") { @@ -10237,6 +10261,9 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { let policy = self.parse_object_name(false)?; AlterTableOperation::DropRowAccessPolicy { policy } + } else if self.parse_keywords(&[Keyword::ROW, Keyword::FILTER]) { + // Databricks: DROP ROW FILTER + AlterTableOperation::DropRowFilter } else if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; @@ -10391,9 +10418,64 @@ impl<'a> Parser<'a> { self.prev_token(); let options = self.parse_options(Keyword::OPTIONS)?; AlterColumnOperation::SetOptions { options } + } else if self.parse_keywords(&[Keyword::SET, Keyword::MASK]) { + // Databricks: SET MASK [USING COLUMNS (...)] + let function = self.parse_object_name(false)?; + let using_columns = if self.parse_keywords(&[Keyword::USING, Keyword::COLUMNS]) { + self.expect_token(&Token::LParen)?; + let cols = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + cols + } else { + vec![] + }; + AlterColumnOperation::SetMask { + mask: ColumnMask { + function, + using_columns, + }, + } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::MASK]) { + AlterColumnOperation::DropMask + } else if matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::ADD) + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.value.eq_ignore_ascii_case("MASKED")) + { + // SQL Server: ADD MASKED WITH (FUNCTION = '') + self.next_token(); // ADD + self.next_token(); // MASKED + self.expect_keyword(Keyword::WITH)?; + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::FUNCTION)?; + self.expect_token(&Token::Eq)?; + let function = self.parse_literal_string()?; + self.expect_token(&Token::RParen)?; + AlterColumnOperation::AddMasked { function } + } else if matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::DROP) + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.value.eq_ignore_ascii_case("MASKED")) + { + self.next_token(); // DROP + self.next_token(); // MASKED + AlterColumnOperation::DropMasked + } else if matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::SET) + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.value.eq_ignore_ascii_case("TAGS")) + { + // Databricks: SET TAGS ('k' = 'v', ...) + self.next_token(); // SET + self.next_token(); // TAGS + AlterColumnOperation::SetTags { + tags: self.parse_databricks_tags()?, + } + } else if matches!(self.peek_token_kind(), Token::Word(w) if w.keyword == Keyword::UNSET) + && matches!(self.peek_nth_token(1).token, Token::Word(w) if w.value.eq_ignore_ascii_case("TAGS")) + { + self.next_token(); // UNSET + self.next_token(); // TAGS + AlterColumnOperation::UnsetTags { + keys: self.parse_databricks_tag_keys()?, + } } else { return self.expected( - "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE, SET OPTIONS after ALTER COLUMN", + "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE, SET OPTIONS, SET/DROP MASK, ADD/DROP MASKED, or SET/UNSET TAGS after ALTER COLUMN", self.peek_token(), ); }; @@ -10435,6 +10517,11 @@ impl<'a> Parser<'a> { options, has_options_keyword: false, } + } else if self.parse_word_ci("TAGS") { + // Databricks: SET TAGS ('k' = 'v', ...) + AlterTableOperation::SetTags { + tags: self.parse_databricks_tags()?, + } } else if let Some(policy) = self.maybe_parse_table_policy(false)? { // Snowflake: SET { AGGREGATION | JOIN } POLICY [...] [FORCE] let force = self.parse_keyword(Keyword::FORCE); @@ -10464,12 +10551,17 @@ impl<'a> Parser<'a> { AlterTableOperation::UnsetTablePolicy { kind: TablePolicyKind::RowAccess, } + } else if self.parse_word_ci("TAGS") { + // Databricks: UNSET TAGS ('k', ...) + AlterTableOperation::UnsetTags { + keys: self.parse_databricks_tag_keys()?, + } } else if self.parse_keyword(Keyword::TAG) { let keys = self.parse_comma_separated(|p| p.parse_object_name(false))?; AlterTableOperation::UnsetTag { keys } } else { return self.expected( - "AGGREGATION POLICY, JOIN POLICY, ROW ACCESS POLICY, or TAG after UNSET", + "AGGREGATION POLICY, JOIN POLICY, ROW ACCESS POLICY, TAG or TAGS after UNSET", self.peek_token(), ); } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 8fc5a840d7..2b84d68f02 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -174,6 +174,21 @@ fn test_create_table_column_mask() { } } +#[test] +fn test_alter_table_mask_filter_tags() { + // Databricks ALTER row filter / column mask / tags. + // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-ddl-alter-table + databricks().verified_stmt("ALTER TABLE t SET ROW FILTER f ON (region)"); + databricks().verified_stmt("ALTER TABLE t DROP ROW FILTER"); + databricks().verified_stmt("ALTER TABLE t ALTER COLUMN c SET MASK f"); + databricks().verified_stmt("ALTER TABLE t ALTER COLUMN c SET MASK f USING COLUMNS (a, b)"); + databricks().verified_stmt("ALTER TABLE t ALTER COLUMN c DROP MASK"); + databricks().verified_stmt("ALTER TABLE t SET TAGS ('k' = 'v', 'k2' = 'v2')"); + databricks().verified_stmt("ALTER TABLE t UNSET TAGS ('k', 'k2')"); + databricks().verified_stmt("ALTER TABLE t ALTER COLUMN c SET TAGS ('pii' = 'true')"); + databricks().verified_stmt("ALTER TABLE t ALTER COLUMN c UNSET TAGS ('pii')"); +} + #[test] fn test_materialized_view_mask_and_row_filter() { // Databricks (materialized) views may carry column masks and a table-level diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index c453699d40..951d9f967a 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -661,6 +661,10 @@ fn parse_security_policy_and_masked() { ms().verified_stmt("DROP SECURITY POLICY secPolicy"); ms().verified_stmt("DROP SECURITY POLICY IF EXISTS secPolicy"); + // ALTER COLUMN ADD/DROP MASKED (dynamic data masking). + ms().verified_stmt("ALTER TABLE t ALTER COLUMN c ADD MASKED WITH (FUNCTION = 'email()')"); + ms().verified_stmt("ALTER TABLE t ALTER COLUMN c DROP MASKED"); + // Predicate exposes the function + target table for lineage. match ms().verified_stmt("CREATE SECURITY POLICY p ADD FILTER PREDICATE s.f(t_id) ON dbo.Sales") {