Skip to content

Commit c9a4171

Browse files
committed
MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE
Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options in CREATE DATABASE statements. This adds two new fields to CreateDatabase: default_charset and default_collation. Supports the following syntax variants: - DEFAULT CHARACTER SET [=] charset_name - CHARACTER SET [=] charset_name - DEFAULT CHARSET [=] charset_name - CHARSET [=] charset_name - DEFAULT COLLATE [=] collation_name - COLLATE [=] collation_name
1 parent 6550ec8 commit c9a4171

File tree

4 files changed

+155
-0
lines changed

4 files changed

+155
-0
lines changed

src/ast/helpers/stmt_create_database.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ pub struct CreateDatabaseBuilder {
8585
pub storage_serialization_policy: Option<StorageSerializationPolicy>,
8686
/// Optional comment attached to the database.
8787
pub comment: Option<String>,
88+
/// Optional default character set (MySQL).
89+
pub default_charset: Option<String>,
90+
/// Optional default collation (MySQL).
91+
pub default_collation: Option<String>,
8892
/// Optional catalog sync configuration.
8993
pub catalog_sync: Option<String>,
9094
/// Optional catalog sync namespace mode.
@@ -120,6 +124,8 @@ impl CreateDatabaseBuilder {
120124
default_ddl_collation: None,
121125
storage_serialization_policy: None,
122126
comment: None,
127+
default_charset: None,
128+
default_collation: None,
123129
catalog_sync: None,
124130
catalog_sync_namespace_mode: None,
125131
catalog_sync_namespace_flatten_delimiter: None,
@@ -218,6 +224,18 @@ impl CreateDatabaseBuilder {
218224
self
219225
}
220226

227+
/// Set the default character set for the database.
228+
pub fn default_charset(mut self, default_charset: Option<String>) -> Self {
229+
self.default_charset = default_charset;
230+
self
231+
}
232+
233+
/// Set the default collation for the database.
234+
pub fn default_collation(mut self, default_collation: Option<String>) -> Self {
235+
self.default_collation = default_collation;
236+
self
237+
}
238+
221239
/// Set the catalog sync for the database.
222240
pub fn catalog_sync(mut self, catalog_sync: Option<String>) -> Self {
223241
self.catalog_sync = catalog_sync;
@@ -272,6 +290,8 @@ impl CreateDatabaseBuilder {
272290
default_ddl_collation: self.default_ddl_collation,
273291
storage_serialization_policy: self.storage_serialization_policy,
274292
comment: self.comment,
293+
default_charset: self.default_charset,
294+
default_collation: self.default_collation,
275295
catalog_sync: self.catalog_sync,
276296
catalog_sync_namespace_mode: self.catalog_sync_namespace_mode,
277297
catalog_sync_namespace_flatten_delimiter: self.catalog_sync_namespace_flatten_delimiter,
@@ -302,6 +322,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
302322
default_ddl_collation,
303323
storage_serialization_policy,
304324
comment,
325+
default_charset,
326+
default_collation,
305327
catalog_sync,
306328
catalog_sync_namespace_mode,
307329
catalog_sync_namespace_flatten_delimiter,
@@ -323,6 +345,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
323345
default_ddl_collation,
324346
storage_serialization_policy,
325347
comment,
348+
default_charset,
349+
default_collation,
326350
catalog_sync,
327351
catalog_sync_namespace_mode,
328352
catalog_sync_namespace_flatten_delimiter,

src/ast/mod.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4285,6 +4285,10 @@ pub enum Statement {
42854285
storage_serialization_policy: Option<StorageSerializationPolicy>,
42864286
/// Optional comment.
42874287
comment: Option<String>,
4288+
/// Optional default character set (MySQL).
4289+
default_charset: Option<String>,
4290+
/// Optional default collation (MySQL).
4291+
default_collation: Option<String>,
42884292
/// Optional catalog sync identifier.
42894293
catalog_sync: Option<String>,
42904294
/// Catalog sync namespace mode.
@@ -5165,6 +5169,8 @@ impl fmt::Display for Statement {
51655169
default_ddl_collation,
51665170
storage_serialization_policy,
51675171
comment,
5172+
default_charset,
5173+
default_collation,
51685174
catalog_sync,
51695175
catalog_sync_namespace_mode,
51705176
catalog_sync_namespace_flatten_delimiter,
@@ -5224,6 +5230,14 @@ impl fmt::Display for Statement {
52245230
write!(f, " COMMENT = '{comment}'")?;
52255231
}
52265232

5233+
if let Some(charset) = default_charset {
5234+
write!(f, " DEFAULT CHARACTER SET {charset}")?;
5235+
}
5236+
5237+
if let Some(collation) = default_collation {
5238+
write!(f, " DEFAULT COLLATE {collation}")?;
5239+
}
5240+
52275241
if let Some(sync) = catalog_sync {
52285242
write!(f, " CATALOG_SYNC = '{sync}'")?;
52295243
}

src/parser/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5294,6 +5294,8 @@ impl<'a> Parser<'a> {
52945294
let db_name = self.parse_object_name(false)?;
52955295
let mut location = None;
52965296
let mut managed_location = None;
5297+
let mut default_charset = None;
5298+
let mut default_collation = None;
52975299
loop {
52985300
match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) {
52995301
Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?),
@@ -5309,6 +5311,26 @@ impl<'a> Parser<'a> {
53095311
None
53105312
};
53115313

5314+
// Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options
5315+
loop {
5316+
let has_default = self.parse_keyword(Keyword::DEFAULT);
5317+
if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET])
5318+
|| self.parse_keyword(Keyword::CHARSET)
5319+
{
5320+
self.expect_token(&Token::Eq).ok();
5321+
default_charset = Some(self.parse_identifier()?.value);
5322+
} else if self.parse_keyword(Keyword::COLLATE) {
5323+
self.expect_token(&Token::Eq).ok();
5324+
default_collation = Some(self.parse_identifier()?.value);
5325+
} else if has_default {
5326+
// DEFAULT keyword not followed by CHARACTER SET, CHARSET, or COLLATE
5327+
self.prev_token();
5328+
break;
5329+
} else {
5330+
break;
5331+
}
5332+
}
5333+
53125334
Ok(Statement::CreateDatabase {
53135335
db_name,
53145336
if_not_exists: ine,
@@ -5325,6 +5347,8 @@ impl<'a> Parser<'a> {
53255347
default_ddl_collation: None,
53265348
storage_serialization_policy: None,
53275349
comment: None,
5350+
default_charset,
5351+
default_collation,
53285352
catalog_sync: None,
53295353
catalog_sync_namespace_mode: None,
53305354
catalog_sync_namespace_flatten_delimiter: None,

tests/sqlparser_mysql.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4354,3 +4354,96 @@ fn test_create_index_options() {
43544354
"CREATE INDEX idx_name ON t(c1, c2) USING BTREE LOCK = EXCLUSIVE ALGORITHM = DEFAULT",
43554355
);
43564356
}
4357+
4358+
#[test]
4359+
fn parse_create_database_with_charset() {
4360+
// Test DEFAULT CHARACTER SET with = sign
4361+
mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4");
4362+
4363+
// Test DEFAULT CHARACTER SET without = sign (normalized form)
4364+
mysql_and_generic().one_statement_parses_to(
4365+
"CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4",
4366+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4367+
);
4368+
4369+
// Test CHARACTER SET without DEFAULT
4370+
mysql_and_generic().one_statement_parses_to(
4371+
"CREATE DATABASE mydb CHARACTER SET utf8mb4",
4372+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4373+
);
4374+
4375+
// Test CHARSET shorthand
4376+
mysql_and_generic().one_statement_parses_to(
4377+
"CREATE DATABASE mydb CHARSET utf8mb4",
4378+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4379+
);
4380+
4381+
// Test DEFAULT CHARSET shorthand
4382+
mysql_and_generic().one_statement_parses_to(
4383+
"CREATE DATABASE mydb DEFAULT CHARSET utf8mb4",
4384+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4385+
);
4386+
4387+
// Test DEFAULT COLLATE
4388+
mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci");
4389+
4390+
// Test COLLATE without DEFAULT
4391+
mysql_and_generic().one_statement_parses_to(
4392+
"CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci",
4393+
"CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci",
4394+
);
4395+
4396+
// Test both CHARACTER SET and COLLATE together
4397+
mysql_and_generic().verified_stmt(
4398+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4399+
);
4400+
4401+
// Test IF NOT EXISTS with CHARACTER SET
4402+
mysql_and_generic()
4403+
.verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER SET utf16");
4404+
4405+
// Test the exact syntax from the issue
4406+
mysql_and_generic().one_statement_parses_to(
4407+
"CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16",
4408+
"CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16",
4409+
);
4410+
}
4411+
4412+
#[test]
4413+
fn parse_create_database_with_charset_errors() {
4414+
// Missing charset name after CHARACTER SET
4415+
assert!(mysql_and_generic()
4416+
.parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET")
4417+
.is_err());
4418+
4419+
// Missing charset name after CHARSET
4420+
assert!(mysql_and_generic()
4421+
.parse_sql_statements("CREATE DATABASE mydb CHARSET")
4422+
.is_err());
4423+
4424+
// Missing collation name after COLLATE
4425+
assert!(mysql_and_generic()
4426+
.parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE")
4427+
.is_err());
4428+
4429+
// Equals sign but no value
4430+
assert!(mysql_and_generic()
4431+
.parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =")
4432+
.is_err());
4433+
}
4434+
4435+
#[test]
4436+
fn parse_create_database_with_charset_option_ordering() {
4437+
// MySQL allows COLLATE before CHARACTER SET - output is normalized to CHARACTER SET first
4438+
// (matches MySQL's own SHOW CREATE DATABASE output order)
4439+
mysql_and_generic().one_statement_parses_to(
4440+
"CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT CHARACTER SET utf8mb4",
4441+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4442+
);
4443+
4444+
// COLLATE first without DEFAULT keywords
4445+
mysql_and_generic().one_statement_parses_to(
4446+
"CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET utf8mb4",
4447+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4448+
);
4449+
}

0 commit comments

Comments
 (0)