Skip to content

Commit 72b295a

Browse files
authored
Support optional AS keyword in CTE definitions for Databricks (#2286)
1 parent ea9b413 commit 72b295a

File tree

5 files changed

+101
-44
lines changed

5 files changed

+101
-44
lines changed

src/dialect/databricks.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,9 @@ impl Dialect for DatabricksDialect {
9999
fn supports_bang_not_operator(&self) -> bool {
100100
true
101101
}
102+
103+
/// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-cte>
104+
fn supports_cte_without_as(&self) -> bool {
105+
true
106+
}
102107
}

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,4 +288,8 @@ impl Dialect for GenericDialect {
288288
fn supports_comma_separated_trim(&self) -> bool {
289289
true
290290
}
291+
292+
fn supports_cte_without_as(&self) -> bool {
293+
true
294+
}
291295
}

src/dialect/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,17 @@ pub trait Dialect: Debug + Any {
16701670
fn supports_comma_separated_trim(&self) -> bool {
16711671
false
16721672
}
1673+
1674+
/// Returns true if the dialect supports the `AS` keyword being
1675+
/// optional in a CTE definition. For example:
1676+
/// ```sql
1677+
/// WITH cte_name (SELECT ...)
1678+
/// ```
1679+
///
1680+
/// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-cte)
1681+
fn supports_cte_without_as(&self) -> bool {
1682+
false
1683+
}
16731684
}
16741685

16751686
/// Operators for which precedence must be defined.

src/parser/mod.rs

Lines changed: 54 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14060,64 +14060,74 @@ impl<'a> Parser<'a> {
1406014060
})
1406114061
}
1406214062

14063-
/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
14063+
/// Parse a CTE (`alias [( col1, col2, ... )] [AS] (subquery)`)
1406414064
pub fn parse_cte(&mut self) -> Result<Cte, ParserError> {
1406514065
let name = self.parse_identifier()?;
1406614066

14067-
let mut cte = if self.parse_keyword(Keyword::AS) {
14068-
let mut is_materialized = None;
14069-
if dialect_of!(self is PostgreSqlDialect) {
14070-
if self.parse_keyword(Keyword::MATERIALIZED) {
14071-
is_materialized = Some(CteAsMaterialized::Materialized);
14072-
} else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) {
14073-
is_materialized = Some(CteAsMaterialized::NotMaterialized);
14067+
let as_optional = self.dialect.supports_cte_without_as();
14068+
14069+
// If AS is optional, first try to parse `name (query)` directly
14070+
if as_optional && !self.peek_keyword(Keyword::AS) {
14071+
if let Some((query, closing_paren_token)) = self.maybe_parse(|p| {
14072+
p.expect_token(&Token::LParen)?;
14073+
let query = p.parse_query()?;
14074+
let closing_paren_token = p.expect_token(&Token::RParen)?;
14075+
Ok((query, closing_paren_token))
14076+
})? {
14077+
let mut cte = Cte {
14078+
alias: TableAlias {
14079+
explicit: false,
14080+
name,
14081+
columns: vec![],
14082+
},
14083+
query,
14084+
from: None,
14085+
materialized: None,
14086+
closing_paren_token: closing_paren_token.into(),
14087+
};
14088+
if self.parse_keyword(Keyword::FROM) {
14089+
cte.from = Some(self.parse_identifier()?);
1407414090
}
14091+
return Ok(cte);
1407514092
}
14076-
self.expect_token(&Token::LParen)?;
14077-
14078-
let query = self.parse_query()?;
14079-
let closing_paren_token = self.expect_token(&Token::RParen)?;
14093+
}
1408014094

14081-
let alias = TableAlias {
14082-
explicit: false,
14083-
name,
14084-
columns: vec![],
14085-
};
14086-
Cte {
14087-
alias,
14088-
query,
14089-
from: None,
14090-
materialized: is_materialized,
14091-
closing_paren_token: closing_paren_token.into(),
14092-
}
14095+
// Determine column definitions and consume AS
14096+
let columns = if self.parse_keyword(Keyword::AS) {
14097+
vec![]
1409314098
} else {
1409414099
let columns = self.parse_table_alias_column_defs()?;
14095-
self.expect_keyword_is(Keyword::AS)?;
14096-
let mut is_materialized = None;
14097-
if dialect_of!(self is PostgreSqlDialect) {
14098-
if self.parse_keyword(Keyword::MATERIALIZED) {
14099-
is_materialized = Some(CteAsMaterialized::Materialized);
14100-
} else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) {
14101-
is_materialized = Some(CteAsMaterialized::NotMaterialized);
14102-
}
14100+
if as_optional {
14101+
let _ = self.parse_keyword(Keyword::AS);
14102+
} else {
14103+
self.expect_keyword_is(Keyword::AS)?;
1410314104
}
14104-
self.expect_token(&Token::LParen)?;
14105+
columns
14106+
};
1410514107

14106-
let query = self.parse_query()?;
14107-
let closing_paren_token = self.expect_token(&Token::RParen)?;
14108+
let mut is_materialized = None;
14109+
if dialect_of!(self is PostgreSqlDialect) {
14110+
if self.parse_keyword(Keyword::MATERIALIZED) {
14111+
is_materialized = Some(CteAsMaterialized::Materialized);
14112+
} else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) {
14113+
is_materialized = Some(CteAsMaterialized::NotMaterialized);
14114+
}
14115+
}
1410814116

14109-
let alias = TableAlias {
14117+
self.expect_token(&Token::LParen)?;
14118+
let query = self.parse_query()?;
14119+
let closing_paren_token = self.expect_token(&Token::RParen)?;
14120+
14121+
let mut cte = Cte {
14122+
alias: TableAlias {
1411014123
explicit: false,
1411114124
name,
1411214125
columns,
14113-
};
14114-
Cte {
14115-
alias,
14116-
query,
14117-
from: None,
14118-
materialized: is_materialized,
14119-
closing_paren_token: closing_paren_token.into(),
14120-
}
14126+
},
14127+
query,
14128+
from: None,
14129+
materialized: is_materialized,
14130+
closing_paren_token: closing_paren_token.into(),
1412114131
};
1412214132
if self.parse_keyword(Keyword::FROM) {
1412314133
cte.from = Some(self.parse_identifier()?);

tests/sqlparser_databricks.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,3 +651,30 @@ fn parse_numeric_prefix_identifier() {
651651

652652
databricks().verified_stmt("SELECT * FROM a.b.1c");
653653
}
654+
655+
#[test]
656+
fn parse_cte_without_as() {
657+
databricks_and_generic().one_statement_parses_to(
658+
"WITH cte (SELECT 1) SELECT * FROM cte",
659+
"WITH cte AS (SELECT 1) SELECT * FROM cte",
660+
);
661+
662+
databricks_and_generic().one_statement_parses_to(
663+
"WITH a AS (SELECT 1), b (SELECT 2) SELECT * FROM a, b",
664+
"WITH a AS (SELECT 1), b AS (SELECT 2) SELECT * FROM a, b",
665+
);
666+
667+
databricks_and_generic().one_statement_parses_to(
668+
"WITH cte (col1, col2) (SELECT 1, 2) SELECT * FROM cte",
669+
"WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte",
670+
);
671+
672+
databricks_and_generic().verified_query("WITH cte AS (SELECT 1) SELECT * FROM cte");
673+
674+
databricks_and_generic()
675+
.verified_query("WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte");
676+
677+
assert!(all_dialects_where(|d| !d.supports_cte_without_as())
678+
.parse_sql_statements("WITH cte (SELECT 1) SELECT * FROM cte")
679+
.is_err());
680+
}

0 commit comments

Comments
 (0)