Skip to content

Commit 4d89c61

Browse files
authored
Add comment parsing for shouldReturnResultSet and countParameters (#1207)
## Description Closes #1201 Closes #1254 Goes through each character in the sql string while keeping track of the current state (normal, comment, string literal, identifier). Provides an interface for consumers to iterate over the non-comment characters. ## Testing See unit tests. I was also testing with this script: <details> <summary>Main.java</summary> ```java import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; import java.util.List; public class Main { public static void main(String[] args) throws Exception { String url = System.getenv("DATABRICKS_URL"); if (url == null) { System.err.println("DATABRICKS_URL environment variable is not set"); System.exit(1); } Class.forName("com.databricks.client.jdbc.Driver"); List<String> examples; examples = List.of( // "/* This is a comment */", "SELECT 1; /* This is also a comment */", """ SELECT /* This is a comment that spans multiple lines */ 1; """, "SELECT /* Comments are not limited to Latin characters: 评论 😊 */ 1;", "SELECT /* Comments /* can be */ nested */ 1;", "SELECT /* Quotes in '/*' comments \"/*\" are not special */ */ */ 1;", "/* A prefixed comment */ SELECT 1;", """ SELECT '/* This is not a comment */'; /* This is not a comment */ """ ); for (int i = 0; i < examples.size(); i++) { String example = examples.get(i); System.out.println("=============================="); try { Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(example); while (rs.next()) { System.out.println("Result " + i + ": " + rs.getObject(1)); } } catch (Exception e) { System.out.println(e.getMessage()); } System.out.println("=============================="); System.out.println(""); System.out.println(""); } System.out.println(""); System.out.println(""); System.out.println("=============================="); try { Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(""" /* hi */ SELECT 1; """); while (rs.next()) { System.out.println("Multi line block: " + rs.getObject(1)); } } catch (Exception e) { System.out.println("Multi line block: " + e.getMessage()); } System.out.println("=============================="); System.out.println(""); System.out.println(""); System.out.println("=============================="); try { Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(""" /* /* */ */ SELECT 2; """); while (rs.next()) { System.out.println("Nested: " + rs.getObject(1)); } } catch (Exception e) { System.out.println("Nested: " + e.getMessage()); } System.out.println("=============================="); System.out.println(""); System.out.println(""); System.out.println("=============================="); try { Connection conn = DriverManager.getConnection(url); PreparedStatement pstmt = conn.prepareStatement(""" /* ? /* ? */ ? */ SELECT ? /* ? /* ? */ ? */ /* ? /* ? */ ? */; """); pstmt.setString(1, "hello"); ResultSet rs = pstmt.executeQuery(); while (rs.next()) { System.out.println("Nested param: " + rs.getObject(1)); } } catch (Exception e) { System.out.println("Nested param: " + e.getMessage()); } System.out.println("=============================="); } } ``` </details> ## Additional Notes to the Reviewer --------- Signed-off-by: rileythomp <[email protected]>
1 parent c7291f5 commit 4d89c61

7 files changed

Lines changed: 587 additions & 69 deletions

File tree

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
- PECOBLR-1121 Arrow patch to circumvent Arrow issues with JDK 16+.
1313

1414
### Fixed
15+
- Fixed `DatabricksParameterMetaData.countParameters` and `DatabricksStatement.trimCommentsAndWhitespaces` with a `SqlCommentParser` utility class.
1516
- Fixed `rollback()` to throw `SQLException` when called in auto-commit mode (no active transaction), aligning with JDBC spec. Previously it silently sent a ROLLBACK command to the server.
1617
- Fixed `fetchAutoCommitStateFromServer()` to accept both `"1"`/`"0"` and `"true"`/`"false"` responses from `SET AUTOCOMMIT` query, since different server implementations return different formats.
1718
- Fixed socket leak in SDK HTTP client that prevented CRaC checkpointing. The SDK's connection pool was not shut down on `connection.close()`, leaving TCP sockets open.

src/main/java/com/databricks/jdbc/api/impl/DatabricksParameterMetaData.java

Lines changed: 10 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static com.databricks.jdbc.common.DatabricksJdbcConstants.EMPTY_STRING;
44

55
import com.databricks.jdbc.common.util.DatabricksTypeUtil;
6+
import com.databricks.jdbc.common.util.SqlCommentParser;
67
import com.databricks.jdbc.common.util.WrapperUtil;
78
import com.databricks.jdbc.log.JdbcLogger;
89
import com.databricks.jdbc.log.JdbcLoggerFactory;
@@ -136,8 +137,7 @@ private ImmutableSqlParameter getObject(int param) {
136137
}
137138

138139
/**
139-
* Counts the number of parameter markers (?) in the SQL statement. This is currently a hacky
140-
* implementation that may need improvement for complex SQL.
140+
* Counts the number of parameter markers (?) in the SQL statement.
141141
*
142142
* @param sql The SQL statement to analyze
143143
* @return The number of parameter markers in the SQL statement
@@ -147,67 +147,14 @@ private int countParameters(String sql) {
147147
return 0;
148148
}
149149

150-
int count = 0;
151-
boolean inSingleQuote = false;
152-
boolean inDoubleQuote = false;
153-
boolean inLineComment = false;
154-
boolean inBlockComment = false;
155-
156-
for (int i = 0; i < sql.length(); i++) {
157-
char c = sql.charAt(i);
158-
char next = (i < sql.length() - 1) ? sql.charAt(i + 1) : '\0';
159-
160-
// Handle comments
161-
if (!inSingleQuote && !inDoubleQuote) {
162-
if (!inBlockComment && !inLineComment && c == '-' && next == '-') {
163-
inLineComment = true;
164-
i++; // Skip next dash
165-
continue;
166-
} else if (!inBlockComment && !inLineComment && c == '/' && next == '*') {
167-
inBlockComment = true;
168-
i++; // Skip next asterisk
169-
continue;
170-
} else if (inLineComment && (c == '\n' || c == '\r')) {
171-
inLineComment = false;
172-
} else if (inBlockComment && c == '*' && next == '/') {
173-
inBlockComment = false;
174-
i++; // Skip next slash
175-
continue;
176-
}
177-
}
178-
179-
// Skip if in comment
180-
if (inLineComment || inBlockComment) {
181-
continue;
182-
}
183-
184-
// Handle quotes with escaped quotes
185-
if (c == '\'') {
186-
if (!inDoubleQuote) {
187-
// Check for escaped single quote
188-
if (inSingleQuote && i < sql.length() - 1 && sql.charAt(i + 1) == '\'') {
189-
i++; // Skip the escaped quote
190-
} else {
191-
inSingleQuote = !inSingleQuote;
150+
int[] count = {0};
151+
SqlCommentParser.forEachNonCommentChar(
152+
sql,
153+
(state, c) -> {
154+
if (state == SqlCommentParser.State.NORMAL && c == '?') {
155+
count[0]++;
192156
}
193-
}
194-
} else if (c == '"') {
195-
if (!inSingleQuote) {
196-
// Check for escaped double quote
197-
if (inDoubleQuote && i < sql.length() - 1 && sql.charAt(i + 1) == '"') {
198-
i++; // Skip the escaped quote
199-
} else {
200-
inDoubleQuote = !inDoubleQuote;
201-
}
202-
}
203-
}
204-
205-
// Count parameter markers only when not inside quotes or comments
206-
if (c == '?' && !inSingleQuote && !inDoubleQuote) {
207-
count++;
208-
}
209-
}
210-
211-
return count;
157+
});
158+
return count[0];
212159
}
213160
}

src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -717,12 +717,7 @@ static String trimCommentsAndWhitespaces(String query) {
717717
"Query cannot be null or empty", DatabricksDriverErrorCode.INPUT_VALIDATION_ERROR);
718718
}
719719

720-
// Trim and remove comments and whitespaces.
721-
String trimmedQuery = query.trim().replaceAll("(?m)--.*$", "");
722-
trimmedQuery = trimmedQuery.replaceAll("/\\*.*?\\*/", "");
723-
trimmedQuery = trimmedQuery.replaceAll("\\s+", " ").trim();
724-
725-
return trimmedQuery;
720+
return SqlCommentParser.stripCommentsAndWhitespaces(query);
726721
}
727722

728723
@VisibleForTesting
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package com.databricks.jdbc.common.util;
2+
3+
/** Utility class for parsing out comments from SQL strings */
4+
public class SqlCommentParser {
5+
6+
public enum State {
7+
NORMAL,
8+
IN_SINGLE_QUOTE,
9+
IN_DOUBLE_QUOTE,
10+
IN_BACKTICK,
11+
IN_LINE_COMMENT,
12+
IN_BLOCK_COMMENT
13+
}
14+
15+
@FunctionalInterface
16+
public interface SqlCharConsumer {
17+
void accept(State state, char c);
18+
}
19+
20+
/**
21+
* Iterates over each character in the SQL string while keeping track of comment, string literal,
22+
* and identifier state. Each character that is not part of a comment calls the consumer with the
23+
* current state and character. Emits a ' ' character after each comment to avoid token fusion.
24+
*
25+
* @param sql the SQL string to parse
26+
* @param consumer called for each visible character with its parsing state
27+
*/
28+
public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) {
29+
if (sql == null || sql.isEmpty()) {
30+
return;
31+
}
32+
33+
State state = State.NORMAL;
34+
int blockCommentDepth = 0;
35+
36+
for (int i = 0; i < sql.length(); i++) {
37+
char c = sql.charAt(i);
38+
char next = (i + 1 < sql.length()) ? sql.charAt(i + 1) : '\0';
39+
40+
switch (state) {
41+
case NORMAL:
42+
if (c == '-' && next == '-') {
43+
state = State.IN_LINE_COMMENT;
44+
i++; // skip second '-'
45+
} else if (c == '/' && next == '*') {
46+
state = State.IN_BLOCK_COMMENT;
47+
blockCommentDepth = 1;
48+
i++; // skip '*'
49+
} else if (c == '\'') {
50+
state = State.IN_SINGLE_QUOTE;
51+
consumer.accept(state, c);
52+
} else if (c == '"') {
53+
state = State.IN_DOUBLE_QUOTE;
54+
consumer.accept(state, c);
55+
} else if (c == '`') {
56+
state = State.IN_BACKTICK;
57+
consumer.accept(state, c);
58+
} else {
59+
consumer.accept(state, c);
60+
}
61+
break;
62+
63+
case IN_SINGLE_QUOTE:
64+
consumer.accept(state, c);
65+
if (c == '\'' && next == '\'') {
66+
consumer.accept(state, next);
67+
i++; // skip escaped quote
68+
} else if (c == '\'') {
69+
state = State.NORMAL;
70+
}
71+
break;
72+
73+
case IN_DOUBLE_QUOTE:
74+
consumer.accept(state, c);
75+
if (c == '"' && next == '"') {
76+
consumer.accept(state, next);
77+
i++; // skip escaped quote
78+
} else if (c == '"') {
79+
state = State.NORMAL;
80+
}
81+
break;
82+
83+
case IN_BACKTICK:
84+
consumer.accept(state, c);
85+
if (c == '`' && next == '`') {
86+
consumer.accept(state, next);
87+
i++; // skip escaped backtick
88+
} else if (c == '`') {
89+
state = State.NORMAL;
90+
}
91+
break;
92+
93+
case IN_LINE_COMMENT:
94+
if (c == '\n' || c == '\r') {
95+
state = State.NORMAL;
96+
consumer.accept(state, ' ');
97+
if (c == '\r' && next == '\n') {
98+
i++; // Treat \r\n as a single line ending
99+
}
100+
}
101+
// else: skip character (part of the comment)
102+
break;
103+
104+
case IN_BLOCK_COMMENT:
105+
if (c == '/' && next == '*') {
106+
blockCommentDepth++;
107+
i++; // skip '*'
108+
} else if (c == '*' && next == '/') {
109+
blockCommentDepth--;
110+
i++; // skip '/'
111+
if (blockCommentDepth == 0) {
112+
state = State.NORMAL;
113+
consumer.accept(state, ' ');
114+
}
115+
}
116+
// else: skip character (part of the comment)
117+
break;
118+
}
119+
}
120+
}
121+
122+
/**
123+
* Removes all comments and extra whitespace from the SQL string.
124+
*
125+
* @param sql the SQL string to remove comments and extra whitespace from
126+
* @return the SQL string with all comments and extra whitespace removed
127+
*/
128+
public static String stripCommentsAndWhitespaces(String sql) {
129+
if (sql == null || sql.isEmpty()) {
130+
return sql;
131+
}
132+
133+
StringBuilder result = new StringBuilder(sql.length());
134+
boolean[] lastWasSpace = {false};
135+
forEachNonCommentChar(
136+
sql,
137+
(state, c) -> {
138+
boolean isNormalWhitespace = state == State.NORMAL && Character.isWhitespace(c);
139+
if (isNormalWhitespace && lastWasSpace[0]) {
140+
return;
141+
}
142+
lastWasSpace[0] = isNormalWhitespace;
143+
result.append(c);
144+
});
145+
return result.toString().trim();
146+
}
147+
}

src/test/java/com/databricks/jdbc/api/impl/DatabricksParameterMetaDataTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,4 +249,25 @@ public void testParameterCountExceedsBindingsThrowsException() {
249249
.getMessage()
250250
.contains("Number of parameter bindings (3) exceeds parameter count (2)"));
251251
}
252+
253+
@Test
254+
public void testParameterCountWithNestedBlockComment() throws SQLException {
255+
String sql = "SELECT /* outer ? /* inner ? */ outer ? */ * FROM table WHERE id = ?";
256+
DatabricksParameterMetaData metadata = new DatabricksParameterMetaData(sql);
257+
assertEquals(1, metadata.getParameterCount());
258+
}
259+
260+
@Test
261+
public void testParameterCountWithTripleNestedBlockComment() throws SQLException {
262+
String sql = "SELECT /* a ? /* b? /* c ? */ b ? */ a ? */ * FROM table WHERE id = ?";
263+
DatabricksParameterMetaData metadata = new DatabricksParameterMetaData(sql);
264+
assertEquals(1, metadata.getParameterCount());
265+
}
266+
267+
@Test
268+
public void testParameterCountWithApostropheInColumnName() throws SQLException {
269+
String sql = "select foo, sum(`x'y`) as sum from table where foo = ? group by foo;";
270+
DatabricksParameterMetaData metadata = new DatabricksParameterMetaData(sql);
271+
assertEquals(1, metadata.getParameterCount());
272+
}
252273
}

src/test/java/com/databricks/jdbc/api/impl/DatabricksStatementTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,18 @@ public void testShouldReturnResultSet_CallStatement() {
700700
"CALL send_notifications(12);", Collections.emptyList()));
701701
}
702702

703+
@Test
704+
public void testShouldReturnResultSet_MultiLineBlockComment() {
705+
String query = "/*\nMulti-line comment\n*/ SELECT * FROM table;";
706+
assertTrue(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
707+
}
708+
709+
@Test
710+
public void testShouldReturnResultSet_NestedBlockComment() {
711+
String query = "/* /* Nested block comment */ */ SELECT * FROM table;";
712+
assertTrue(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
713+
}
714+
703715
@Test
704716
public void testShouldReturnResultSet_StartWithBegin() {
705717
assertTrue(DatabricksStatement.shouldReturnResultSet("BEGIN", Collections.emptyList()));

0 commit comments

Comments
 (0)