Skip to content

Commit f072f62

Browse files
airborne12claude
andcommitted
[fix](search) Make AND/OR/NOT operators case-sensitive in search DSL
Per specification requirement #5, only uppercase AND/OR/NOT should be recognized as operators in search DSL. Lowercase and/or/not should be treated as regular terms, causing parse errors when used as operators. Changes: - Update SearchLexer.g4 to only match uppercase keywords - Update unit tests to expect parse errors for lowercase operators - Update regression tests accordingly - Add comprehensive DSL operator test cases Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 73404df commit f072f62

File tree

11 files changed

+638
-107
lines changed

11 files changed

+638
-107
lines changed

fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ fragment QUOTED_CHAR
4141

4242
// ============== Default lexer rules ==============
4343

44-
AND : 'AND' | 'and' ;
45-
OR : 'OR' | 'or' ;
46-
NOT : 'NOT' | 'not' | '!' ;
44+
AND : 'AND' ;
45+
OR : 'OR' ;
46+
NOT : 'NOT' | '!' ;
4747

4848
LPAREN : '(' ;
4949
RPAREN : ')' ;

fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -872,16 +872,13 @@ public void testUppercaseAndOperator() {
872872

873873
@Test
874874
public void testLowercaseAndOperator() {
875-
// Test: Currently lowercase 'and' is also treated as operator
876-
// According to PDF requirement, only uppercase should be operators
877-
// This test documents current behavior - may need to change
875+
// Lowercase 'and' is NOT an operator in ANTLR grammar (case-sensitive).
876+
// With bareQuery rule, it's parsed as a bare term without field.
877+
// Without default_field, bare term throws exception.
878878
String dsl = "field:a and field:b";
879-
QsPlan plan = SearchDslParser.parseDsl(dsl);
880-
881-
Assertions.assertNotNull(plan);
882-
// Current behavior: lowercase 'and' IS an operator
883-
Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
884-
// TODO: If PDF requires only uppercase, this should fail and return OR or different structure
879+
Assertions.assertThrows(RuntimeException.class, () -> {
880+
SearchDslParser.parseDsl(dsl);
881+
});
885882
}
886883

887884
@Test
@@ -897,15 +894,13 @@ public void testUppercaseOrOperator() {
897894

898895
@Test
899896
public void testLowercaseOrOperator() {
900-
// Test: Currently lowercase 'or' is also treated as operator
901-
// According to PDF requirement, only uppercase should be operators
897+
// Lowercase 'or' is NOT an operator in ANTLR grammar (case-sensitive).
898+
// With bareQuery rule, it's parsed as a bare term without field.
899+
// Without default_field, bare term throws exception.
902900
String dsl = "field:a or field:b";
903-
QsPlan plan = SearchDslParser.parseDsl(dsl);
904-
905-
Assertions.assertNotNull(plan);
906-
// Current behavior: lowercase 'or' IS an operator
907-
Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType());
908-
// TODO: If PDF requires only uppercase, this should fail
901+
Assertions.assertThrows(RuntimeException.class, () -> {
902+
SearchDslParser.parseDsl(dsl);
903+
});
909904
}
910905

911906
@Test
@@ -920,15 +915,13 @@ public void testUppercaseNotOperator() {
920915

921916
@Test
922917
public void testLowercaseNotOperator() {
923-
// Test: Currently lowercase 'not' is also treated as operator
924-
// According to PDF requirement, only uppercase should be operators
918+
// Lowercase 'not' is NOT an operator in ANTLR grammar (case-sensitive).
919+
// With bareQuery rule, it's parsed as a bare term without field.
920+
// Without default_field, bare term throws exception.
925921
String dsl = "not field:spam";
926-
QsPlan plan = SearchDslParser.parseDsl(dsl);
927-
928-
Assertions.assertNotNull(plan);
929-
// Current behavior: lowercase 'not' IS an operator
930-
Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType());
931-
// TODO: If PDF requires only uppercase, this should fail
922+
Assertions.assertThrows(RuntimeException.class, () -> {
923+
SearchDslParser.parseDsl(dsl);
924+
});
932925
}
933926

934927
@Test
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !dsl_or_chain --
3+
1 aterm bterm
4+
2 bterm cterm
5+
3 cterm dterm
6+
4 dterm eterm aterm
7+
8+
-- !dsl_and_chain --
9+
4 dterm eterm aterm
10+
11+
-- !dsl_and_or_mixed --
12+
1 aterm bterm
13+
4 dterm eterm aterm
14+
15+
-- !dsl_and_not_or --
16+
4 dterm eterm aterm
17+
18+
-- !dsl_implicit_and --
19+
3 cterm dterm
20+
21+
-- !dsl_phrase_wrong_order --
22+
23+
-- !dsl_phrase_correct_order --
24+
4 dterm eterm aterm
25+
26+
-- !dsl_escaped_space_and --
27+
4 dterm eterm aterm
28+
29+
-- !dsl_phrase_and_term --
30+
4 dterm eterm aterm
31+
32+
-- !dsl_phrase_wrong_and_term --
33+
34+
-- !dsl_phrase_or_term_1 --
35+
2 bterm cterm
36+
3 cterm dterm
37+
38+
-- !dsl_phrase_or_term_2 --
39+
2 bterm cterm
40+
3 cterm dterm
41+
4 dterm eterm aterm
42+
43+
-- !dsl_and_or_min_should_1 --
44+
1 aterm bterm
45+

regression-test/data/search/test_search_escape.out

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,6 @@
2626
-- !uppercase_not --
2727
8 second fruit
2828

29-
-- !lowercase_and --
30-
7 first fruit
31-
32-
-- !lowercase_or --
33-
1 first content
34-
2 second content
35-
7 first fruit
36-
8 second fruit
37-
3829
-- !exclamation_not --
3930
8 second fruit
4031

regression-test/suites/search/test_search_boundary_cases.groovy

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -86,31 +86,31 @@ suite("test_search_boundary_cases") {
8686
// Boundary Test 1: All NULL fields
8787
qt_boundary_1_all_null_or """
8888
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
89-
WHERE search('field1:anything or field2:anything or field3:anything or field4:anything or field5:anything')
89+
WHERE search('field1:anything OR field2:anything OR field3:anything OR field4:anything OR field5:anything')
9090
"""
9191

9292
qt_boundary_1_all_null_and """
9393
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
94-
WHERE search('field1:anything and field2:anything and field3:anything and field4:anything and field5:anything')
94+
WHERE search('field1:anything AND field2:anything AND field3:anything AND field4:anything AND field5:anything')
9595
"""
9696

9797
// Boundary Test 2: Single field NULL vs multiple fields NULL in OR
9898
qt_boundary_2_single_null_or """
9999
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName}
100-
WHERE search('field1:nonexistent or field2:test')
100+
WHERE search('field1:nonexistent OR field2:test')
101101
ORDER BY id
102102
"""
103103

104104
qt_boundary_2_multiple_null_or """
105105
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName}
106-
WHERE search('field1:nonexistent or field2:test or field3:nonexistent')
106+
WHERE search('field1:nonexistent OR field2:test OR field3:nonexistent')
107107
ORDER BY id
108108
"""
109109

110110
// Boundary Test 3: NOT with various NULL combinations
111111
qt_boundary_3_not_null_field """
112112
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
113-
WHERE search('not field1:test')
113+
WHERE search('NOT field1:test')
114114
"""
115115

116116
qt_boundary_3_external_not_null """
@@ -138,59 +138,59 @@ suite("test_search_boundary_cases") {
138138
// Boundary Test 5: Complex nested boolean with NULLs
139139
qt_boundary_5_complex_nested """
140140
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
141-
WHERE search('((field1:test or field2:test) and (field3:test or field4:test)) or field5:test')
141+
WHERE search('((field1:test OR field2:test) AND (field3:test OR field4:test)) OR field5:test')
142142
"""
143143

144144
qt_boundary_5_detailed_result """
145145
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, field1, field2, field3, field4, field5 FROM ${tableName}
146-
WHERE search('((field1:test or field2:test) and (field3:test or field4:test)) or field5:test')
146+
WHERE search('((field1:test OR field2:test) AND (field3:test OR field4:test)) OR field5:test')
147147
ORDER BY id
148148
"""
149149

150150
// Boundary Test 6: Large OR query with many NULL fields
151151
qt_boundary_6_large_or """
152152
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
153-
WHERE search('field1:"target" or field1:"keyword" or field1:"apple" or field1:"unique1" or
154-
field2:"target" or field2:"keyword" or field2:"apple" or field2:"unique2" or
155-
field3:"target" or field3:"keyword" or field3:"banana" or field3:"unique3" or
156-
field4:"target" or field4:"keyword" or field4:"banana" or field4:"unique4" or
157-
field5:"target" or field5:"keyword" or field5:"cherry" or field5:"unique5"')
153+
WHERE search('field1:"target" OR field1:"keyword" OR field1:"apple" OR field1:"unique1" OR
154+
field2:"target" OR field2:"keyword" OR field2:"apple" OR field2:"unique2" OR
155+
field3:"target" OR field3:"keyword" OR field3:"banana" OR field3:"unique3" OR
156+
field4:"target" OR field4:"keyword" OR field4:"banana" OR field4:"unique4" OR
157+
field5:"target" OR field5:"keyword" OR field5:"cherry" OR field5:"unique5"')
158158
"""
159159

160160
// Boundary Test 7: Special characters and NULL interaction
161161
qt_boundary_7_special_chars_or """
162162
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
163-
WHERE search('field1:special123 or field2:nonexistent')
163+
WHERE search('field1:special123 OR field2:nonexistent')
164164
"""
165165

166166
qt_boundary_7_special_chars_and """
167167
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
168-
WHERE search('field1:special123 and field2:chars456')
168+
WHERE search('field1:special123 AND field2:chars456')
169169
"""
170170

171171
// Boundary Test 8: Case sensitivity with NULL fields
172172
qt_boundary_8_case_variations """
173173
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM ${tableName}
174-
WHERE search('field1:Target or field2:TARGET or field3:target or field4:TaRgEt')
174+
WHERE search('field1:Target OR field2:TARGET OR field3:target OR field4:TaRgEt')
175175
ORDER BY id
176176
"""
177177

178178
// Boundary Test 9: Multiple NOT operations
179179
qt_boundary_9_multiple_not """
180180
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
181-
WHERE search('not (field1:nonexistent or field2:nonexistent or field3:nonexistent)')
181+
WHERE search('NOT (field1:nonexistent OR field2:nonexistent OR field3:nonexistent)')
182182
"""
183183

184184
qt_boundary_9_external_multiple_not """
185185
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
186-
WHERE not search('field1:nonexistent or field2:nonexistent or field3:nonexistent')
186+
WHERE not search('field1:nonexistent OR field2:nonexistent OR field3:nonexistent')
187187
"""
188188

189189
// Boundary Test 10: Performance with NULL-heavy dataset
190190
qt_boundary_10_performance """
191191
SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM ${tableName}
192-
WHERE search('(field1:test or field1:target or field1:keyword) and
193-
(field2:test or field2:target or field2:keyword) and
194-
not (field3:nonexistent or field4:nonexistent or field5:nonexistent)')
192+
WHERE search('(field1:test OR field1:target OR field1:keyword) AND
193+
(field2:test OR field2:target OR field2:keyword) AND
194+
NOT (field3:nonexistent OR field4:nonexistent OR field5:nonexistent)')
195195
"""
196196
}

0 commit comments

Comments
 (0)