diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java index 8440d70b334750..b53386206e9fe4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java @@ -88,12 +88,12 @@ protected void toThrift(TExprNode msg) { // Print QsPlan details if (qsPlan != null) { LOG.info("SearchPredicate.toThrift: QsPlan fieldBindings.size={}", - qsPlan.fieldBindings != null ? qsPlan.fieldBindings.size() : 0); - if (qsPlan.fieldBindings != null) { - for (int i = 0; i < qsPlan.fieldBindings.size(); i++) { - SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(i); + qsPlan.getFieldBindings() != null ? qsPlan.getFieldBindings().size() : 0); + if (qsPlan.getFieldBindings() != null) { + for (int i = 0; i < qsPlan.getFieldBindings().size(); i++) { + SearchDslParser.QsFieldBinding binding = qsPlan.getFieldBindings().get(i); LOG.info("SearchPredicate.toThrift: binding[{}] fieldName='{}', slotIndex={}", - i, binding.fieldName, binding.slotIndex); + i, binding.getFieldName(), binding.getSlotIndex()); } } } @@ -142,14 +142,14 @@ public int hashCode() { private TSearchParam buildThriftParam() { TSearchParam param = new TSearchParam(); param.setOriginalDsl(dslString); - param.setRoot(convertQsNodeToThrift(qsPlan.root)); + param.setRoot(convertQsNodeToThrift(qsPlan.getRoot())); List bindings = new ArrayList<>(); - for (int i = 0; i < qsPlan.fieldBindings.size(); i++) { - SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(i); + for (int i = 0; i < qsPlan.getFieldBindings().size(); i++) { + SearchDslParser.QsFieldBinding binding = qsPlan.getFieldBindings().get(i); TSearchFieldBinding thriftBinding = new TSearchFieldBinding(); - String fieldPath = binding.fieldName; + String fieldPath = binding.getFieldName(); thriftBinding.setFieldName(fieldPath); // Check if this is a variant subcolumn (contains dot) @@ -176,9 +176,10 @@ private TSearchParam buildThriftParam() { SlotRef slotRef = (SlotRef) this.children.get(i); int actualSlotId = slotRef.getSlotId().asInt(); thriftBinding.setSlotIndex(actualSlotId); - LOG.info("buildThriftParam: binding field='{}', actual slotId={}", binding.fieldName, actualSlotId); + LOG.info("buildThriftParam: binding field='{}', actual slotId={}", + binding.getFieldName(), actualSlotId); } else { - LOG.warn("buildThriftParam: No corresponding SlotRef for field '{}'", binding.fieldName); + LOG.warn("buildThriftParam: No corresponding SlotRef for field '{}'", binding.getFieldName()); thriftBinding.setSlotIndex(i); // fallback to position } @@ -230,10 +231,10 @@ private boolean isExplainVerboseContext() { private List buildDslAstExplainLines() { List lines = new ArrayList<>(); - if (qsPlan == null || qsPlan.root == null) { + if (qsPlan == null || qsPlan.getRoot() == null) { return lines; } - TSearchClause rootClause = convertQsNodeToThrift(qsPlan.root); + TSearchClause rootClause = convertQsNodeToThrift(qsPlan.getRoot()); appendClauseExplain(rootClause, lines, 0); return lines; } @@ -258,11 +259,11 @@ private void appendClauseExplain(TSearchClause clause, List lines, int d private List buildFieldBindingExplainLines() { List lines = new ArrayList<>(); - if (qsPlan == null || qsPlan.fieldBindings == null || qsPlan.fieldBindings.isEmpty()) { + if (qsPlan == null || qsPlan.getFieldBindings() == null || qsPlan.getFieldBindings().isEmpty()) { return lines; } - IntStream.range(0, qsPlan.fieldBindings.size()).forEach(index -> { - SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(index); + IntStream.range(0, qsPlan.getFieldBindings().size()).forEach(index -> { + SearchDslParser.QsFieldBinding binding = qsPlan.getFieldBindings().get(index); String slotDesc = ""; if (index < children.size() && children.get(index) instanceof SlotRef) { SlotRef slotRef = (SlotRef) children.get(index); @@ -272,7 +273,7 @@ private List buildFieldBindingExplainLines() { } else if (index < children.size()) { slotDesc = children.get(index).toSqlWithoutTbl(); } - lines.add(binding.fieldName + " -> " + slotDesc); + lines.add(binding.getFieldName() + " -> " + slotDesc); }); return lines; } @@ -304,29 +305,29 @@ private TSearchClause convertQsNodeToThrift( TSearchClause clause = new TSearchClause(); // Convert clause type - clause.setClauseType(node.type.name()); + clause.setClauseType(node.getType().name()); - if (node.field != null) { - clause.setFieldName(node.field); + if (node.getField() != null) { + clause.setFieldName(node.getField()); } - if (node.value != null) { - clause.setValue(node.value); + if (node.getValue() != null) { + clause.setValue(node.getValue()); } // Convert occur type for Lucene-style boolean queries - if (node.occur != null) { - clause.setOccur(convertQsOccurToThrift(node.occur)); + if (node.getOccur() != null) { + clause.setOccur(convertQsOccurToThrift(node.getOccur())); } // Convert minimum_should_match for OCCUR_BOOLEAN - if (node.minimumShouldMatch != null) { - clause.setMinimumShouldMatch(node.minimumShouldMatch); + if (node.getMinimumShouldMatch() != null) { + clause.setMinimumShouldMatch(node.getMinimumShouldMatch()); } - if (node.children != null && !node.children.isEmpty()) { + if (node.getChildren() != null && !node.getChildren().isEmpty()) { List childClauses = new ArrayList<>(); - for (SearchDslParser.QsNode child : node.children) { + for (SearchDslParser.QsNode child : node.getChildren()) { childClauses.add(convertQsNodeToThrift(child)); } clause.setChildren(childClauses); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java index 3114fff0593b4d..83da8f99a96821 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java @@ -93,7 +93,7 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { try { // Parse DSL to get field bindings SearchDslParser.QsPlan qsPlan = search.getQsPlan(); - if (qsPlan == null || qsPlan.fieldBindings == null || qsPlan.fieldBindings.isEmpty()) { + if (qsPlan == null || qsPlan.getFieldBindings() == null || qsPlan.getFieldBindings().isEmpty()) { LOG.warn("Search function has no field bindings: {}", search.getDslString()); return search; } @@ -102,8 +102,8 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { // Create slot reference children from field bindings List slotChildren = new ArrayList<>(); - for (SearchDslParser.QsFieldBinding binding : qsPlan.fieldBindings) { - String originalFieldName = binding.fieldName; + for (SearchDslParser.QsFieldBinding binding : qsPlan.getFieldBindings()) { + String originalFieldName = binding.getFieldName(); Expression childExpr; String normalizedFieldName; @@ -151,14 +151,14 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { } normalizedFields.put(originalFieldName, normalizedFieldName); - binding.fieldName = normalizedFieldName; + binding.setFieldName(normalizedFieldName); slotChildren.add(childExpr); } LOG.info("Rewriting search function: dsl='{}' with {} slot children", search.getDslString(), slotChildren.size()); - normalizePlanFields(qsPlan.root, normalizedFields); + normalizePlanFields(qsPlan.getRoot(), normalizedFields); // Create SearchExpression with slot children return new SearchExpression(search.getDslString(), qsPlan, slotChildren); @@ -182,16 +182,16 @@ private void normalizePlanFields(SearchDslParser.QsNode node, Map entry : normalized.entrySet()) { - if (entry.getKey().equalsIgnoreCase(node.field)) { - node.field = entry.getValue(); + if (entry.getKey().equalsIgnoreCase(node.getField())) { + node.setField(entry.getValue()); break; } } } - if (node.children != null) { - for (SearchDslParser.QsNode child : node.children) { + if (node.getChildren() != null) { + for (SearchDslParser.QsNode child : node.getChildren()) { normalizePlanFields(child, normalized); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java index b4c880546a700c..82cbdcdf2401c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java @@ -23,19 +23,24 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.tree.ParseTree; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; -import java.util.HashSet; +import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nullable; /** * Search DSL Parser using ANTLR-generated parser. @@ -57,6 +62,22 @@ public class SearchDslParser { private static final Logger LOG = LogManager.getLogger(SearchDslParser.class); private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + /** + * Exception for search DSL syntax errors. + * This exception is thrown when the DSL string cannot be parsed due to syntax issues. + * It is distinct from programming errors (NullPointerException, etc.) to provide + * clearer error messages to users. + */ + public static class SearchDslSyntaxException extends RuntimeException { + public SearchDslSyntaxException(String message) { + super(message); + } + + public SearchDslSyntaxException(String message, Throwable cause) { + super(message, cause); + } + } + /** * Parse DSL string and return intermediate representation */ @@ -74,10 +95,14 @@ public static QsPlan parseDsl(String dsl) { * - default_operator: "and" or "or" for multi-term queries * - mode: "standard" or "lucene" * - minimum_should_match: integer for Lucene mode + * - fields: array of field names for multi-field search + * - type: "best_fields" (default) or "cross_fields" for multi-field semantics * Example: '{"default_field":"title","mode":"lucene","minimum_should_match":0}' + * Example: '{"fields":["title","content"],"default_operator":"and"}' + * Example: '{"fields":["title","content"],"type":"cross_fields"}' * @return Parsed QsPlan */ - public static QsPlan parseDsl(String dsl, String optionsJson) { + public static QsPlan parseDsl(String dsl, @Nullable String optionsJson) { // Parse options from JSON SearchOptions searchOptions = parseOptions(optionsJson); @@ -87,9 +112,19 @@ public static QsPlan parseDsl(String dsl, String optionsJson) { // Use Lucene mode parser if specified if (searchOptions.isLuceneMode()) { + // Multi-field + Lucene mode: first expand DSL, then parse with Lucene semantics + if (searchOptions.isMultiFieldMode()) { + return parseDslMultiFieldLuceneMode(dsl, searchOptions.getFields(), + defaultOperator, searchOptions); + } return parseDslLuceneMode(dsl, defaultField, defaultOperator, searchOptions); } + // Multi-field mode parsing (standard mode) + if (searchOptions.isMultiFieldMode()) { + return parseDslMultiFieldMode(dsl, searchOptions.getFields(), defaultOperator, searchOptions); + } + // Standard mode parsing return parseDslStandardMode(dsl, defaultField, defaultOperator); } @@ -103,7 +138,7 @@ public static QsPlan parseDsl(String dsl, String optionsJson) { * @param defaultOperator Default operator ("and" or "or") for multi-term queries (optional, defaults to "or") * @return Parsed QsPlan */ - public static QsPlan parseDsl(String dsl, String defaultField, String defaultOperator) { + public static QsPlan parseDsl(String dsl, @Nullable String defaultField, @Nullable String defaultOperator) { return parseDslStandardMode(dsl, defaultField, defaultOperator); } @@ -124,7 +159,7 @@ private static QsPlan parseDslStandardMode(String dsl, String defaultField, Stri try { // Create ANTLR lexer and parser - SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl)); + SearchLexer lexer = new SearchLexer(CharStreams.fromString(expandedDsl)); CommonTokenStream tokens = new CommonTokenStream(lexer); SearchParser parser = new SearchParser(tokens); @@ -136,7 +171,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, org.antlr.v4.runtime.RecognitionException e) { - throw new RuntimeException("Invalid search DSL syntax at line " + line + throw new SearchDslSyntaxException("Syntax error at line " + line + ":" + charPositionInLine + " " + msg); } }); @@ -146,7 +181,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, // Check if parsing was successful if (tree == null) { - throw new RuntimeException("Invalid search DSL syntax"); + throw new SearchDslSyntaxException("Invalid search DSL syntax: parsing returned null"); } // Build AST using visitor pattern @@ -163,9 +198,28 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (Exception e) { + } catch (SearchDslSyntaxException e) { + // Syntax error in DSL - user input issue LOG.error("Failed to parse search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); - throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e); + throw new SearchDslSyntaxException("Invalid search DSL: " + dsl + ". " + e.getMessage(), e); + } catch (IllegalArgumentException e) { + // Invalid argument - user input issue + LOG.error("Invalid argument in search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new IllegalArgumentException("Invalid search DSL argument: " + dsl + ". " + e.getMessage(), e); + } catch (NullPointerException e) { + // Internal error - programming bug + LOG.error("Internal error (NPE) while parsing search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (IndexOutOfBoundsException e) { + // Internal error - programming bug + LOG.error("Internal error (IOOB) while parsing search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (RuntimeException e) { + // Other runtime errors + LOG.error("Unexpected error while parsing search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new RuntimeException("Unexpected error parsing search DSL: " + dsl + ". " + e.getMessage(), e); } } @@ -419,7 +473,7 @@ private static List tokenizeDsl(String dsl) { // End of term (only if not escaped - handled above) if (currentTerm.length() > 0) { terms.add(currentTerm.toString()); - currentTerm = new StringBuilder(); + currentTerm.setLength(0); // Reuse StringBuilder instead of creating new one } } else { currentTerm.append(c); @@ -461,6 +515,510 @@ private static boolean containsWildcard(String term) { return false; } + // ============ Common Helper Methods ============ + + /** + * Create an error QsPlan for empty DSL input. + */ + private static QsPlan createEmptyDslErrorPlan() { + return new QsPlan(new QsNode(QsClauseType.TERM, "error", "empty_dsl"), new ArrayList<>()); + } + + /** + * Validate that DSL is not null or empty. + * @return true if DSL is valid (non-null, non-empty) + */ + private static boolean isValidDsl(String dsl) { + return dsl != null && !dsl.trim().isEmpty(); + } + + /** + * Validate fields list for multi-field mode. + * @throws IllegalArgumentException if fields is null or empty + */ + private static void validateFieldsList(List fields) { + if (fields == null || fields.isEmpty()) { + throw new IllegalArgumentException( + "fields list cannot be null or empty for multi-field mode, got: " + fields); + } + } + + /** + * Common ANTLR parsing helper with visitor pattern. + * Reduces code duplication across parsing methods. + * + * @param expandedDsl The expanded DSL string to parse + * @param visitorFactory Factory function to create the appropriate visitor + * @param originalDsl Original DSL for error messages + * @param modeDescription Description of the parsing mode for error messages + * @return Parsed QsPlan + */ + private static QsPlan parseWithVisitor(String expandedDsl, + Function visitorFactory, + String originalDsl, String modeDescription) { + try { + // Create ANTLR lexer and parser + SearchLexer lexer = new SearchLexer(CharStreams.fromString(expandedDsl)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + SearchParser parser = new SearchParser(tokens); + + // Add error listener + parser.removeErrorListeners(); + parser.addErrorListener(new org.antlr.v4.runtime.BaseErrorListener() { + @Override + public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, + Object offendingSymbol, + int line, int charPositionInLine, + String msg, org.antlr.v4.runtime.RecognitionException e) { + throw new SearchDslSyntaxException("Syntax error at line " + line + + ":" + charPositionInLine + " " + msg); + } + }); + + ParseTree tree = parser.search(); + if (tree == null) { + throw new SearchDslSyntaxException("Invalid search DSL syntax: parsing returned null"); + } + + // Build AST using provided visitor + FieldTrackingVisitor visitor = visitorFactory.apply(parser); + QsNode root = visitor.visit(tree); + + // Extract field bindings + Set fieldNames = visitor.getFieldNames(); + List bindings = new ArrayList<>(); + int slotIndex = 0; + for (String fieldName : fieldNames) { + bindings.add(new QsFieldBinding(fieldName, slotIndex++)); + } + + return new QsPlan(root, bindings); + + } catch (SearchDslSyntaxException e) { + // Syntax error in DSL - user input issue + LOG.error("Failed to parse search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new SearchDslSyntaxException("Invalid search DSL: " + originalDsl + ". " + e.getMessage(), e); + } catch (IllegalArgumentException e) { + // Invalid argument - user input issue + LOG.error("Invalid argument in search DSL ({}): '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new IllegalArgumentException("Invalid search DSL argument: " + originalDsl + + ". " + e.getMessage(), e); + } catch (NullPointerException e) { + // Internal error - programming bug + LOG.error("Internal error (NPE) while parsing search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + originalDsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (IndexOutOfBoundsException e) { + // Internal error - programming bug + LOG.error("Internal error (IOOB) while parsing search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + originalDsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (RuntimeException e) { + // Other runtime errors + LOG.error("Unexpected error while parsing search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Unexpected error parsing search DSL: " + originalDsl + + ". " + e.getMessage(), e); + } + } + + /** + * Expand a single item (term or function) across multiple fields with OR. + * Example: "hello" + ["title", "content"] -> "(title:hello OR content:hello)" + * Example: "EXACT(foo)" + ["title", "content"] -> "(title:EXACT(foo) OR content:EXACT(foo))" + * + * @param item The term or function to expand + * @param fields List of field names + * @return Expanded DSL string + */ + private static String expandItemAcrossFields(String item, List fields) { + if (fields.size() == 1) { + return fields.get(0) + ":" + item; + } + return fields.stream() + .map(field -> field + ":" + item) + .collect(Collectors.joining(" OR ", "(", ")")); + } + + // ============ Multi-Field Expansion Methods ============ + + /** + * Parse DSL in multi-field mode. + * Expansion behavior depends on the type option: + * - best_fields (default): all terms must match within the same field + * - cross_fields: terms can match across different fields + * + * @param dsl DSL query string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining term groups + * @param options Search options containing type setting + * @return Parsed QsPlan + */ + private static QsPlan parseDslMultiFieldMode(String dsl, List fields, String defaultOperator, + SearchOptions options) { + if (!isValidDsl(dsl)) { + return createEmptyDslErrorPlan(); + } + validateFieldsList(fields); + + String normalizedOperator = normalizeDefaultOperator(defaultOperator); + String expandedDsl; + if (options.isCrossFieldsMode()) { + // cross_fields: terms can be across different fields + expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizedOperator); + } else if (options.isBestFieldsMode()) { + // best_fields: all terms must be in the same field + expandedDsl = expandMultiFieldDslBestFields(dsl.trim(), fields, normalizedOperator); + } else { + // Should never happen due to setType() validation, but provide fallback + throw new IllegalStateException( + "Invalid type value: '" + options.getType() + "'. Expected 'best_fields' or 'cross_fields'"); + } + return parseWithVisitor(expandedDsl, parser -> new QsAstBuilder(), dsl, "multi-field mode"); + } + + /** + * Parse DSL in multi-field mode with Lucene boolean semantics. + * First expands DSL across fields, then applies Lucene-style MUST/SHOULD/MUST_NOT logic. + * Expansion behavior depends on the type option (best_fields or cross_fields). + * + * @param dsl DSL query string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining term groups + * @param options Search options containing Lucene mode settings and type + * @return Parsed QsPlan with Lucene boolean semantics + */ + private static QsPlan parseDslMultiFieldLuceneMode(String dsl, List fields, + String defaultOperator, SearchOptions options) { + if (!isValidDsl(dsl)) { + return createEmptyDslErrorPlan(); + } + validateFieldsList(fields); + + String normalizedOperator = normalizeDefaultOperator(defaultOperator); + String expandedDsl; + if (options.isCrossFieldsMode()) { + // cross_fields: terms can be across different fields + expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizedOperator); + } else if (options.isBestFieldsMode()) { + // best_fields: all terms must be in the same field + expandedDsl = expandMultiFieldDslBestFields(dsl.trim(), fields, normalizedOperator); + } else { + // Should never happen due to setType() validation, but provide fallback + throw new IllegalStateException( + "Invalid type value: '" + options.getType() + "'. Expected 'best_fields' or 'cross_fields'"); + } + return parseWithVisitor(expandedDsl, parser -> new QsLuceneModeAstBuilder(options), + dsl, "multi-field Lucene mode"); + } + + /** + * Expand simplified DSL to multi-field format. + * Each term without field prefix is expanded to OR across all fields. + * + * @param dsl Simple DSL string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining term groups + * @return Expanded full DSL + */ + private static String expandMultiFieldDsl(String dsl, List fields, String defaultOperator) { + // Note: fields validation is done by validateFieldsList() before calling this method + if (fields.size() == 1) { + // Single field - delegate to existing method + return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator); + } + + // 1. If DSL already contains field names, handle mixed case + if (containsFieldReference(dsl)) { + return expandOperatorExpressionAcrossFields(dsl, fields); + } + + // 2. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN) + if (startsWithFunction(dsl)) { + // Expand function across fields: EXACT(foo) -> (f1:EXACT(foo) OR f2:EXACT(foo)) + return expandFunctionAcrossFields(dsl, fields); + } + + // 3. Check for explicit boolean operators in DSL + if (containsExplicitOperators(dsl)) { + return expandOperatorExpressionAcrossFields(dsl, fields); + } + + // 4. Tokenize and analyze terms + List terms = tokenizeDsl(dsl); + if (terms.isEmpty()) { + return expandTermAcrossFields(dsl, fields); + } + + // 5. Single term - expand across fields + if (terms.size() == 1) { + return expandTermAcrossFields(terms.get(0), fields); + } + + // 6. Multiple terms - expand each across fields, join with operator + String joinOperator = "and".equals(defaultOperator) ? " AND " : " OR "; + StringBuilder result = new StringBuilder(); + for (int i = 0; i < terms.size(); i++) { + if (i > 0) { + result.append(joinOperator); + } + result.append(expandTermAcrossFields(terms.get(i), fields)); + } + return result.toString(); + } + + /** + * Expand multi-field DSL using best_fields semantics. + * Each field is wrapped with all terms joined by the default operator, then fields are ORed. + * + * Example: "machine learning" with fields ["title", "content"] and default_operator "and" + * Result: (title:machine AND title:learning) OR (content:machine AND content:learning) + * + * @param dsl Simple DSL string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining terms within each field + * @return Expanded full DSL with best_fields semantics + */ + private static String expandMultiFieldDslBestFields(String dsl, List fields, + String defaultOperator) { + // Note: fields validation is done by validateFieldsList() before calling this method + if (fields.size() == 1) { + // Single field - delegate to existing method + return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator); + } + + // 1. Check for leading NOT - must use cross_fields semantics for correct negation + // "NOT hello" should expand to "NOT (title:hello OR content:hello)" + // rather than "(NOT title:hello) OR (NOT content:hello)" which has wrong semantics + String trimmedDsl = dsl.trim(); + if (trimmedDsl.toUpperCase().startsWith("NOT ") + || trimmedDsl.toUpperCase().startsWith("NOT\t")) { + // Use cross_fields expansion for leading NOT + return expandOperatorExpressionAcrossFields(dsl, fields); + } + + // 2. If DSL contains field references or explicit operators, apply best_fields + // by expanding the entire expression per field and ORing the results + if (containsFieldReference(dsl) || containsExplicitOperators(dsl)) { + return expandOperatorExpressionAcrossFieldsBestFields(dsl, fields, defaultOperator); + } + + // 3. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN) + if (startsWithFunction(dsl)) { + // For functions, use cross_fields approach (function applied to each field) + return expandFunctionAcrossFields(dsl, fields); + } + + // 4. Tokenize and analyze terms + List terms = tokenizeDsl(dsl); + if (terms.isEmpty()) { + // Single term case - expand across fields with OR + return expandTermAcrossFields(dsl, fields); + } + + // 5. Single term - expand across fields with OR + if (terms.size() == 1) { + return expandTermAcrossFields(terms.get(0), fields); + } + + // 6. Multiple terms - best_fields: each field with all terms, then OR across fields + String termOperator = "and".equals(defaultOperator) ? " AND " : " OR "; + + StringBuilder result = new StringBuilder(); + for (int fieldIdx = 0; fieldIdx < fields.size(); fieldIdx++) { + if (fieldIdx > 0) { + result.append(" OR "); + } + + String field = fields.get(fieldIdx); + // Build: (field:term1 AND field:term2 AND ...) + result.append("("); + for (int termIdx = 0; termIdx < terms.size(); termIdx++) { + if (termIdx > 0) { + result.append(termOperator); + } + result.append(field).append(":").append(terms.get(termIdx)); + } + result.append(")"); + } + return result.toString(); + } + + /** + * Handle DSL with explicit operators using best_fields semantics. + * For complex expressions, we group by field and OR across fields. + */ + private static String expandOperatorExpressionAcrossFieldsBestFields(String dsl, + List fields, String defaultOperator) { + // For expressions with explicit operators, we apply the entire expression to each field + // and OR the results: (title:expr) OR (content:expr) + StringBuilder result = new StringBuilder(); + for (int i = 0; i < fields.size(); i++) { + if (i > 0) { + result.append(" OR "); + } + String field = fields.get(i); + // Expand the DSL for this single field + String fieldDsl = expandSimplifiedDsl(dsl, field, defaultOperator); + result.append("(").append(fieldDsl).append(")"); + } + return result.toString(); + } + + /** + * Expand a single term across multiple fields with OR. + * Example: "hello" + ["title", "content"] -> "(title:hello OR content:hello)" + * Delegates to expandItemAcrossFields for DRY compliance. + */ + private static String expandTermAcrossFields(String term, List fields) { + return expandItemAcrossFields(term, fields); + } + + /** + * Expand a function call across multiple fields. + * Example: "EXACT(foo bar)" + ["title", "content"] -> "(title:EXACT(foo bar) OR content:EXACT(foo bar))" + * Delegates to expandItemAcrossFields for DRY compliance. + */ + private static String expandFunctionAcrossFields(String dsl, List fields) { + return expandItemAcrossFields(dsl, fields); + } + + /** + * Handle DSL with explicit operators (AND/OR/NOT). + * Each operand without field prefix is expanded across fields. + * Example: "hello AND world" + ["title", "content"] -> + * "(title:hello OR content:hello) AND (title:world OR content:world)" + */ + private static String expandOperatorExpressionAcrossFields(String dsl, List fields) { + StringBuilder result = new StringBuilder(); + StringBuilder currentTerm = new StringBuilder(); + int i = 0; + + while (i < dsl.length()) { + // Skip whitespace + while (i < dsl.length() && Character.isWhitespace(dsl.charAt(i))) { + i++; + } + if (i >= dsl.length()) { + break; + } + + // Handle escape sequences + if (dsl.charAt(i) == '\\' && i + 1 < dsl.length()) { + currentTerm.append(dsl.charAt(i)); + currentTerm.append(dsl.charAt(i + 1)); + i += 2; + continue; + } + + // Handle parentheses - include entire group as a term + if (dsl.charAt(i) == '(') { + int depth = 1; + currentTerm.append('('); + i++; + while (i < dsl.length() && depth > 0) { + char c = dsl.charAt(i); + if (c == '(') { + depth++; + } else if (c == ')') { + depth--; + } + currentTerm.append(c); + i++; + } + continue; + } + + // Try to match operators + String remaining = dsl.substring(i); + String upperRemaining = remaining.toUpperCase(); + + // Check for AND operator + if (matchesOperatorWord(upperRemaining, "AND")) { + flushTermAcrossFields(result, currentTerm, fields); + appendWithSpace(result, "AND"); + i += 3; + continue; + } + + // Check for OR operator + if (matchesOperatorWord(upperRemaining, "OR")) { + flushTermAcrossFields(result, currentTerm, fields); + appendWithSpace(result, "OR"); + i += 2; + continue; + } + + // Check for NOT operator + if (matchesOperatorWord(upperRemaining, "NOT")) { + flushTermAcrossFields(result, currentTerm, fields); + appendWithSpace(result, "NOT"); + i += 3; + continue; + } + + // Accumulate term character + currentTerm.append(dsl.charAt(i)); + i++; + } + + // Flush final term + flushTermAcrossFields(result, currentTerm, fields); + + return result.toString().trim(); + } + + /** + * Check if the string starts with an operator word followed by whitespace or end of string. + */ + private static boolean matchesOperatorWord(String upper, String op) { + if (!upper.startsWith(op)) { + return false; + } + int opLen = op.length(); + // Must be followed by whitespace or end of string + return upper.length() == opLen || Character.isWhitespace(upper.charAt(opLen)); + } + + /** + * Flush accumulated term, expanding across fields if needed. + */ + private static void flushTermAcrossFields(StringBuilder result, StringBuilder term, List fields) { + String trimmed = term.toString().trim(); + if (!trimmed.isEmpty()) { + // Check if term already has a field reference + if (containsFieldReference(trimmed)) { + appendWithSpace(result, trimmed); + } else if (trimmed.startsWith("(") && trimmed.endsWith(")")) { + // Parenthesized expression - recursively expand + String inner = trimmed.substring(1, trimmed.length() - 1).trim(); + String expanded = expandOperatorExpressionAcrossFields(inner, fields); + appendWithSpace(result, "(" + expanded + ")"); + } else if (startsWithFunction(trimmed)) { + // Function - expand across fields + appendWithSpace(result, expandFunctionAcrossFields(trimmed, fields)); + } else { + // Regular term - expand across fields + appendWithSpace(result, expandTermAcrossFields(trimmed, fields)); + } + term.setLength(0); + } + } + + /** + * Append text to StringBuilder with a leading space if not empty. + */ + private static void appendWithSpace(StringBuilder sb, String text) { + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(text); + } + /** * Clause types supported */ @@ -490,16 +1048,26 @@ public enum QsOccur { MUST_NOT // Term must not appear (equivalent to -term) } + /** + * Common interface for AST builders that track field names. + * Both QsAstBuilder and QsLuceneModeAstBuilder implement this interface. + */ + private interface FieldTrackingVisitor { + Set getFieldNames(); + + QsNode visit(ParseTree tree); + } + /** * ANTLR visitor to build QsNode AST from parse tree */ - private static class QsAstBuilder extends SearchParserBaseVisitor { - private final Set fieldNames = new HashSet<>(); + private static class QsAstBuilder extends SearchParserBaseVisitor implements FieldTrackingVisitor { + private final Set fieldNames = new LinkedHashSet<>(); // Context stack to track current field name during parsing private String currentFieldName = null; public Set getFieldNames() { - return fieldNames; + return Collections.unmodifiableSet(fieldNames); } @Override @@ -676,7 +1244,9 @@ public QsNode visitSearchValue(SearchParser.SearchValueContext ctx) { return createExactNode(fieldName, ctx.exactValue().getText()); } - // Fallback for unknown types + // Fallback for unknown types - should not normally reach here + LOG.warn("Unexpected search value type encountered, falling back to TERM: field={}, text={}", + fieldName, ctx.getText()); return createTermNode(fieldName, ctx.getText()); } @@ -747,8 +1317,10 @@ private QsNode createAnyAllNode(String fieldName, String anyAllText) { return new QsNode(QsClauseType.ALL, fieldName, sanitizedContent); } - // Fallback to ANY for unknown cases - return new QsNode(QsClauseType.ANY, fieldName, sanitizedContent); + // Unknown ANY/ALL clause type - this should not happen with valid grammar + throw new IllegalArgumentException( + "Unknown ANY/ALL clause type: '" + anyAllText + "'. " + + "Expected ANY(...) or ALL(...)."); } private QsNode createExactNode(String fieldName, String exactText) { @@ -785,20 +1357,29 @@ private String stripOuterQuotes(String text) { } /** - * Intermediate Representation for search DSL parsing result + * Intermediate Representation for search DSL parsing result. + * This class is immutable after construction. */ public static class QsPlan { @JsonProperty("root") - public QsNode root; + private final QsNode root; @JsonProperty("fieldBindings") - public List fieldBindings; + private final List fieldBindings; @JsonCreator public QsPlan(@JsonProperty("root") QsNode root, @JsonProperty("fieldBindings") List fieldBindings) { - this.root = root; - this.fieldBindings = fieldBindings != null ? fieldBindings : new ArrayList<>(); + this.root = Objects.requireNonNull(root, "root cannot be null"); + this.fieldBindings = fieldBindings != null ? new ArrayList<>(fieldBindings) : new ArrayList<>(); + } + + public QsNode getRoot() { + return root; + } + + public List getFieldBindings() { + return Collections.unmodifiableList(fieldBindings); } /** @@ -808,8 +1389,8 @@ public static QsPlan fromJson(String json) { try { return JSON_MAPPER.readValue(json, QsPlan.class); } catch (JsonProcessingException e) { - LOG.warn("Failed to parse QsPlan from JSON: {}", json, e); - return new QsPlan(new QsNode(QsClauseType.TERM, "error", null), new ArrayList<>()); + throw new IllegalArgumentException( + "Failed to parse search plan from JSON: " + e.getMessage(), e); } } @@ -820,8 +1401,7 @@ public String toJson() { try { return JSON_MAPPER.writeValueAsString(this); } catch (JsonProcessingException e) { - LOG.warn("Failed to serialize QsPlan to JSON", e); - return "{}"; + throw new RuntimeException("Failed to serialize QsPlan to JSON", e); } } @@ -839,32 +1419,38 @@ public boolean equals(Object o) { return false; } QsPlan qsPlan = (QsPlan) o; - return Objects.equals(root, qsPlan.root) - && Objects.equals(fieldBindings, qsPlan.fieldBindings); + return Objects.equals(root, qsPlan.getRoot()) + && Objects.equals(fieldBindings, qsPlan.getFieldBindings()); } } /** - * Search AST node representing a clause in the DSL + * Search AST node representing a clause in the DSL. + * + *

Warning: This class is mutable. The {@code occur}, {@code children}, + * and other fields can be modified after construction. Although this class implements + * {@code equals()} and {@code hashCode()}, it should NOT be used as a key in + * {@code HashMap} or element in {@code HashSet} if any field may be modified after + * insertion, as this will break the hash-based collection contract. */ public static class QsNode { @JsonProperty("type") - public QsClauseType type; + private final QsClauseType type; @JsonProperty("field") - public String field; + private String field; @JsonProperty("value") - public String value; + private final String value; @JsonProperty("children") - public List children; + private final List children; @JsonProperty("occur") - public QsOccur occur; + private QsOccur occur; @JsonProperty("minimumShouldMatch") - public Integer minimumShouldMatch; + private final Integer minimumShouldMatch; /** * Constructor for JSON deserialization @@ -886,30 +1472,96 @@ public QsNode(@JsonProperty("type") QsClauseType type, this.type = type; this.field = field; this.value = value; - this.children = children != null ? children : new ArrayList<>(); + this.children = children != null ? new ArrayList<>(children) : new ArrayList<>(); this.occur = occur; this.minimumShouldMatch = minimumShouldMatch; } + /** + * Constructor for leaf nodes (TERM, PHRASE, PREFIX, etc.) + * + * @param type the clause type + * @param field the field name + * @param value the field value + */ public QsNode(QsClauseType type, String field, String value) { this.type = type; this.field = field; this.value = value; this.children = new ArrayList<>(); + this.occur = null; + this.minimumShouldMatch = null; } + /** + * Constructor for compound nodes (AND, OR, NOT) + * + * @param type the clause type + * @param children the child nodes + */ public QsNode(QsClauseType type, List children) { this.type = type; - this.children = children != null ? children : new ArrayList<>(); + this.field = null; + this.value = null; + this.children = children != null ? new ArrayList<>(children) : new ArrayList<>(); + this.occur = null; + this.minimumShouldMatch = null; } + /** + * Constructor for OCCUR_BOOLEAN nodes with minimum_should_match + * + * @param type the clause type + * @param children the child nodes + * @param minimumShouldMatch the minimum number of SHOULD clauses that must match + */ public QsNode(QsClauseType type, List children, Integer minimumShouldMatch) { this.type = type; - this.children = children != null ? children : new ArrayList<>(); + this.field = null; + this.value = null; + this.children = children != null ? new ArrayList<>(children) : new ArrayList<>(); + this.occur = null; this.minimumShouldMatch = minimumShouldMatch; } - public QsNode withOccur(QsOccur occur) { + public QsClauseType getType() { + return type; + } + + public String getField() { + return field; + } + + /** + * Sets the field name for this node (used for field name normalization). + * @param field the normalized field name + */ + public void setField(String field) { + this.field = field; + } + + public String getValue() { + return value; + } + + public List getChildren() { + return Collections.unmodifiableList(children); + } + + public QsOccur getOccur() { + return occur; + } + + public Integer getMinimumShouldMatch() { + return minimumShouldMatch; + } + + /** + * Sets the occur type for this node. + * @param occur the occur type (MUST, SHOULD, MUST_NOT) + * @return this node for method chaining + */ + public QsNode setOccur(QsOccur occur) { this.occur = occur; return this; } @@ -928,24 +1580,25 @@ public boolean equals(Object o) { return false; } QsNode qsNode = (QsNode) o; - return type == qsNode.type - && Objects.equals(field, qsNode.field) - && Objects.equals(value, qsNode.value) - && Objects.equals(children, qsNode.children) - && occur == qsNode.occur - && Objects.equals(minimumShouldMatch, qsNode.minimumShouldMatch); + return type == qsNode.getType() + && Objects.equals(field, qsNode.getField()) + && Objects.equals(value, qsNode.getValue()) + && Objects.equals(children, qsNode.getChildren()) + && occur == qsNode.getOccur() + && Objects.equals(minimumShouldMatch, qsNode.getMinimumShouldMatch()); } } /** - * Field binding information extracted from DSL + * Field binding information extracted from DSL. + * The fieldName may be modified for normalization purposes. */ public static class QsFieldBinding { @JsonProperty("fieldName") - public String fieldName; + private String fieldName; @JsonProperty("slotIndex") - public int slotIndex; + private final int slotIndex; @JsonCreator public QsFieldBinding(@JsonProperty("fieldName") String fieldName, @@ -954,6 +1607,22 @@ public QsFieldBinding(@JsonProperty("fieldName") String fieldName, this.slotIndex = slotIndex; } + public String getFieldName() { + return fieldName; + } + + /** + * Sets the field name (used for field name normalization). + * @param fieldName the normalized field name + */ + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + + public int getSlotIndex() { + return slotIndex; + } + @Override public int hashCode() { return Objects.hash(fieldName, slotIndex); @@ -980,13 +1649,26 @@ public boolean equals(Object o) { * - default_operator: "and" or "or" for multi-term queries (default: "or") * - mode: "standard" (default) or "lucene" (ES/Lucene-style boolean parsing) * - minimum_should_match: integer for Lucene mode (default: 0 for filter context) + * - fields: array of field names for multi-field search (mutually exclusive with default_field) */ public static class SearchOptions { + @JsonProperty("default_field") private String defaultField = null; + + @JsonProperty("default_operator") private String defaultOperator = null; + + @JsonProperty("mode") private String mode = "standard"; + + @JsonProperty("minimum_should_match") private Integer minimumShouldMatch = null; + private List fields = null; + + @JsonProperty("type") + private String type = "best_fields"; // "best_fields" (default) or "cross_fields" + public String getDefaultField() { return defaultField; } @@ -1022,43 +1704,130 @@ public Integer getMinimumShouldMatch() { public void setMinimumShouldMatch(Integer minimumShouldMatch) { this.minimumShouldMatch = minimumShouldMatch; } - } - /** - * Parse options JSON string. - * Supports the following fields: - * - default_field: default field name when DSL doesn't specify field - * - default_operator: "and" or "or" for multi-term queries - * - mode: "standard" or "lucene" - * - minimum_should_match: integer for Lucene mode - */ - private static SearchOptions parseOptions(String optionsJson) { - SearchOptions options = new SearchOptions(); - if (optionsJson == null || optionsJson.trim().isEmpty()) { - return options; + public List getFields() { + return fields == null ? null : Collections.unmodifiableList(fields); } - try { - // Parse JSON using Jackson - com.fasterxml.jackson.databind.JsonNode jsonNode = JSON_MAPPER.readTree(optionsJson); + /** + * Set fields with empty element filtering. + * Empty or whitespace-only strings are filtered out. + */ + @JsonSetter("fields") + public void setFields(List fields) { + if (fields == null) { + this.fields = null; + return; + } + // Filter out empty or whitespace-only elements + List filtered = fields.stream() + .filter(f -> f != null && !f.trim().isEmpty()) + .map(String::trim) + .collect(Collectors.toList()); + this.fields = filtered.isEmpty() ? null : new ArrayList<>(filtered); + } + + /** + * Check if multi-field mode is enabled. + * Multi-field mode is active when fields array is non-null and non-empty. + */ + public boolean isMultiFieldMode() { + return fields != null && !fields.isEmpty(); + } + + /** + * Get the multi-field search type ("best_fields" or "cross_fields"). + */ + public String getType() { + return type; + } - if (jsonNode.has("default_field")) { - options.setDefaultField(jsonNode.get("default_field").asText()); + /** + * Set the multi-field search type. + * @param type Either "best_fields" or "cross_fields" (case-insensitive) + * @throws IllegalArgumentException if type is invalid + */ + public void setType(String type) { + if (type == null) { + this.type = "best_fields"; + return; } - if (jsonNode.has("default_operator")) { - options.setDefaultOperator(jsonNode.get("default_operator").asText()); + String normalized = type.trim().toLowerCase(); + if (!"cross_fields".equals(normalized) && !"best_fields".equals(normalized)) { + throw new IllegalArgumentException( + "'type' must be 'cross_fields' or 'best_fields', got: " + type); } - if (jsonNode.has("mode")) { - options.setMode(jsonNode.get("mode").asText()); + this.type = normalized; + } + + /** + * Check if best_fields mode is enabled (default). + * In best_fields mode, all terms must match within the same field. + */ + public boolean isBestFieldsMode() { + return "best_fields".equals(type); + } + + /** + * Check if cross_fields mode is enabled. + * In cross_fields mode, terms can match across different fields. + */ + public boolean isCrossFieldsMode() { + return "cross_fields".equals(type); + } + + /** + * Validate the options after deserialization. + * Checks for: + * - Mutual exclusion between fields and default_field + * - minimum_should_match is non-negative if specified + * + * @throws IllegalArgumentException if validation fails + */ + public void validate() { + // Validation: fields and default_field are mutually exclusive + if (fields != null && !fields.isEmpty() + && defaultField != null && !defaultField.isEmpty()) { + throw new IllegalArgumentException( + "'fields' and 'default_field' are mutually exclusive. Use only one."); } - if (jsonNode.has("minimum_should_match")) { - options.setMinimumShouldMatch(jsonNode.get("minimum_should_match").asInt()); + // Validation: minimum_should_match should be non-negative + if (minimumShouldMatch != null && minimumShouldMatch < 0) { + throw new IllegalArgumentException( + "'minimum_should_match' must be non-negative, got: " + minimumShouldMatch); } - } catch (Exception e) { - LOG.warn("Failed to parse search options JSON: {}", optionsJson, e); } + } - return options; + /** + * Parse options JSON string using Jackson databind. + * The SearchOptions class uses @JsonProperty annotations for field mapping + * and @JsonSetter for custom deserialization logic (e.g., filtering empty fields). + * + * @param optionsJson JSON string containing search options + * @return Parsed and validated SearchOptions + * @throws IllegalArgumentException if JSON is invalid or validation fails + */ + private static SearchOptions parseOptions(String optionsJson) { + if (optionsJson == null || optionsJson.trim().isEmpty()) { + return new SearchOptions(); + } + + try { + // Use Jackson to deserialize directly into SearchOptions + // @JsonProperty annotations handle field mapping + // @JsonSetter on setFields() handles empty element filtering + SearchOptions options = JSON_MAPPER.readValue(optionsJson, SearchOptions.class); + // Run validation checks (mutual exclusion, range checks, etc.) + options.validate(); + return options; + } catch (IllegalArgumentException e) { + // Re-throw validation errors as-is + throw e; + } catch (JsonProcessingException e) { + throw new IllegalArgumentException( + "Invalid search options JSON: '" + optionsJson + "'. Error: " + e.getMessage(), e); + } } /** @@ -1091,7 +1860,7 @@ private static QsPlan parseDslLuceneMode(String dsl, String defaultField, String try { // Create ANTLR lexer and parser - SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl)); + SearchLexer lexer = new SearchLexer(CharStreams.fromString(expandedDsl)); CommonTokenStream tokens = new CommonTokenStream(lexer); SearchParser parser = new SearchParser(tokens); @@ -1103,7 +1872,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, org.antlr.v4.runtime.RecognitionException e) { - throw new RuntimeException("Invalid search DSL syntax at line " + line + throw new SearchDslSyntaxException("Syntax error at line " + line + ":" + charPositionInLine + " " + msg); } }); @@ -1111,7 +1880,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, // Parse using standard parser first ParseTree tree = parser.search(); if (tree == null) { - throw new RuntimeException("Invalid search DSL syntax"); + throw new SearchDslSyntaxException("Invalid search DSL syntax: parsing returned null"); } // Build AST using Lucene-mode visitor @@ -1128,9 +1897,31 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (Exception e) { + } catch (SearchDslSyntaxException e) { + // Syntax error in DSL - user input issue LOG.error("Failed to parse search DSL in Lucene mode: '{}' (expanded: '{}')", dsl, expandedDsl, e); - throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e); + throw new SearchDslSyntaxException("Invalid search DSL: " + dsl + ". " + e.getMessage(), e); + } catch (IllegalArgumentException e) { + // Invalid argument - user input issue + LOG.error("Invalid argument in search DSL (Lucene mode): '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new IllegalArgumentException("Invalid search DSL argument: " + dsl + ". " + e.getMessage(), e); + } catch (NullPointerException e) { + // Internal error - programming bug + LOG.error("Internal error (NPE) while parsing search DSL in Lucene mode: '{}' (expanded: '{}')", + dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (IndexOutOfBoundsException e) { + // Internal error - programming bug + LOG.error("Internal error (IOOB) while parsing search DSL in Lucene mode: '{}' (expanded: '{}')", + dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (RuntimeException e) { + // Other runtime errors + LOG.error("Unexpected error while parsing search DSL in Lucene mode: '{}' (expanded: '{}')", + dsl, expandedDsl, e); + throw new RuntimeException("Unexpected error parsing search DSL: " + dsl + ". " + e.getMessage(), e); } } @@ -1138,8 +1929,9 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, * ANTLR visitor for Lucene-mode AST building. * Transforms standard boolean expressions into Lucene-style OCCUR_BOOLEAN queries. */ - private static class QsLuceneModeAstBuilder extends SearchParserBaseVisitor { - private final Set fieldNames = new HashSet<>(); + private static class QsLuceneModeAstBuilder extends SearchParserBaseVisitor + implements FieldTrackingVisitor { + private final Set fieldNames = new LinkedHashSet<>(); private final SearchOptions options; private String currentFieldName = null; @@ -1148,7 +1940,7 @@ public QsLuceneModeAstBuilder(SearchOptions options) { } public Set getFieldNames() { - return fieldNames; + return Collections.unmodifiableSet(fieldNames); } @Override @@ -1184,7 +1976,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { TermWithOccur singleTerm = terms.get(0); if (singleTerm.isNegated) { // Single negated term - must wrap in OCCUR_BOOLEAN for BE to handle MUST_NOT - singleTerm.node.occur = QsOccur.MUST_NOT; + singleTerm.node.setOccur(QsOccur.MUST_NOT); List children = new ArrayList<>(); children.add(singleTerm.node); return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0); @@ -1212,7 +2004,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { if (hasMust) { terms = terms.stream() .filter(t -> t.occur != QsOccur.SHOULD) - .collect(java.util.stream.Collectors.toList()); + .collect(Collectors.toList()); } } @@ -1224,7 +2016,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { TermWithOccur remainingTerm = terms.get(0); if (remainingTerm.occur == QsOccur.MUST_NOT) { // Single MUST_NOT term - must wrap in OCCUR_BOOLEAN for BE to handle - remainingTerm.node.occur = QsOccur.MUST_NOT; + remainingTerm.node.setOccur(QsOccur.MUST_NOT); List children = new ArrayList<>(); children.add(remainingTerm.node); return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0); @@ -1235,7 +2027,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { // Build OCCUR_BOOLEAN node List children = new ArrayList<>(); for (TermWithOccur term : terms) { - term.node.occur = term.occur; + term.node.setOccur(term.occur); children.add(term.node); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java index 152a7e75929f60..76e25cc3879e65 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java @@ -127,8 +127,8 @@ public void testQsPlanParsing() { try { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan, "Plan should not be null for DSL: " + dsl); - Assertions.assertNotNull(plan.root, "Plan root should not be null for DSL: " + dsl); - Assertions.assertTrue(plan.fieldBindings.size() > 0, "Should have field bindings for DSL: " + dsl); + Assertions.assertNotNull(plan.getRoot(), "Plan root should not be null for DSL: " + dsl); + Assertions.assertTrue(plan.getFieldBindings().size() > 0, "Should have field bindings for DSL: " + dsl); } catch (Exception e) { // DSL parsing might fail for complex cases - that's acceptable System.out.println("DSL parsing failed for: " + dsl + " - " + e.getMessage()); @@ -142,10 +142,10 @@ public void testFieldNameExtraction() { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl); // Should extract 3 unique field names - Assertions.assertEquals(3, plan.fieldBindings.size()); + Assertions.assertEquals(3, plan.getFieldBindings().size()); - List fieldNames = plan.fieldBindings.stream() - .map(binding -> binding.fieldName) + List fieldNames = plan.getFieldBindings().stream() + .map(binding -> binding.getFieldName()) .distinct() .collect(java.util.stream.Collectors.toList()); @@ -163,12 +163,12 @@ public void testCaseInsensitiveFieldNames() { SearchDslParser.QsPlan plan2 = SearchDslParser.parseDsl(dsl2); // Both should work and extract field names - Assertions.assertEquals(1, plan1.fieldBindings.size()); - Assertions.assertEquals(1, plan2.fieldBindings.size()); + Assertions.assertEquals(1, plan1.getFieldBindings().size()); + Assertions.assertEquals(1, plan2.getFieldBindings().size()); // Field names should be consistent (implementation dependent) - Assertions.assertNotNull(plan1.fieldBindings.get(0).fieldName); - Assertions.assertNotNull(plan2.fieldBindings.get(0).fieldName); + Assertions.assertNotNull(plan1.getFieldBindings().get(0).getFieldName()); + Assertions.assertNotNull(plan2.getFieldBindings().get(0).getFieldName()); } @Test @@ -198,10 +198,10 @@ public void testComplexDslStructures() { try { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(complexDsl); Assertions.assertNotNull(plan); - Assertions.assertNotNull(plan.root); + Assertions.assertNotNull(plan.getRoot()); // Should have multiple field bindings - Assertions.assertTrue(plan.fieldBindings.size() >= 2); + Assertions.assertTrue(plan.getFieldBindings().size() >= 2); } catch (Exception e) { // Complex DSL might not be fully supported yet @@ -215,7 +215,7 @@ public void testSlotReferenceConsistency() { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl); // Create slot reference matching the field binding - String fieldName = plan.fieldBindings.get(0).fieldName; + String fieldName = plan.getFieldBindings().get(0).getFieldName(); SlotReference slot = new SlotReference(fieldName, StringType.INSTANCE, true, Arrays.asList()); SearchExpression expr = new SearchExpression(dsl, plan, Arrays.asList(slot)); @@ -246,8 +246,8 @@ public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception { Assertions.assertEquals("name", slot.getName()); SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan(); - Assertions.assertEquals("name", normalizedPlan.fieldBindings.get(0).fieldName); - Assertions.assertEquals("name", normalizedPlan.root.field); + Assertions.assertEquals("name", normalizedPlan.getFieldBindings().get(0).getFieldName()); + Assertions.assertEquals("name", normalizedPlan.getRoot().getField()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java index 9d6996b928decd..43de6d03205354 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java @@ -212,7 +212,7 @@ public void testMultipleSlotChildren() { Assertions.assertEquals(2, searchExpr.children().size()); Assertions.assertEquals(titleSlot, searchExpr.children().get(0)); Assertions.assertEquals(contentSlot, searchExpr.children().get(1)); - Assertions.assertEquals(2, searchExpr.getQsPlan().fieldBindings.size()); + Assertions.assertEquals(2, searchExpr.getQsPlan().getFieldBindings().size()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java index 6279aead20a708..e45790b4efbdc3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java @@ -37,14 +37,14 @@ public void testSimpleTermQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertNotNull(plan.root); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("hello", plan.root.value); + Assertions.assertNotNull(plan.getRoot()); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("hello", plan.getRoot().getValue()); - Assertions.assertEquals(1, plan.fieldBindings.size()); - QsFieldBinding binding = plan.fieldBindings.get(0); - Assertions.assertEquals("title", binding.fieldName); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + QsFieldBinding binding = plan.getFieldBindings().get(0); + Assertions.assertEquals("title", binding.getFieldName()); } @Test @@ -53,9 +53,9 @@ public void testPhraseQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PHRASE, plan.root.type); - Assertions.assertEquals("content", plan.root.field); - Assertions.assertEquals("hello world", plan.root.value); + Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType()); + Assertions.assertEquals("content", plan.getRoot().getField()); + Assertions.assertEquals("hello world", plan.getRoot().getValue()); } @Test @@ -64,9 +64,9 @@ public void testPrefixQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PREFIX, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("hello*", plan.root.value); + Assertions.assertEquals(QsClauseType.PREFIX, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("hello*", plan.getRoot().getValue()); } @Test @@ -75,9 +75,9 @@ public void testWildcardQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.WILDCARD, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("h*llo", plan.root.value); + Assertions.assertEquals(QsClauseType.WILDCARD, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("h*llo", plan.getRoot().getValue()); } @Test @@ -86,9 +86,9 @@ public void testRegexpQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.REGEXP, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("[a-z]+", plan.root.value); // slashes removed + Assertions.assertEquals(QsClauseType.REGEXP, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("[a-z]+", plan.getRoot().getValue()); // slashes removed } @Test @@ -97,9 +97,9 @@ public void testRangeQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.RANGE, plan.root.type); - Assertions.assertEquals("age", plan.root.field); - Assertions.assertEquals("[18 TO 65]", plan.root.value); + Assertions.assertEquals(QsClauseType.RANGE, plan.getRoot().getType()); + Assertions.assertEquals("age", plan.getRoot().getField()); + Assertions.assertEquals("[18 TO 65]", plan.getRoot().getValue()); } @Test @@ -108,9 +108,9 @@ public void testListQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.LIST, plan.root.type); - Assertions.assertEquals("category", plan.root.field); - Assertions.assertEquals("IN(tech news)", plan.root.value); + Assertions.assertEquals(QsClauseType.LIST, plan.getRoot().getType()); + Assertions.assertEquals("category", plan.getRoot().getField()); + Assertions.assertEquals("IN(tech news)", plan.getRoot().getValue()); } @Test @@ -119,9 +119,9 @@ public void testAnyQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("java python", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("java python", plan.getRoot().getValue()); } @Test @@ -130,9 +130,9 @@ public void testAllQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("programming language", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("programming language", plan.getRoot().getValue()); } @Test @@ -141,9 +141,9 @@ public void testAllQueryWithQuotes() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("redirect", plan.root.field); - Assertions.assertEquals("Rainbowman", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("redirect", plan.getRoot().getField()); + Assertions.assertEquals("Rainbowman", plan.getRoot().getValue()); } @Test @@ -152,9 +152,9 @@ public void testAnyQueryWithQuotes() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("Mandy Patinkin", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("Mandy Patinkin", plan.getRoot().getValue()); } @Test @@ -163,23 +163,23 @@ public void testAndQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); - QsNode leftChild = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.TERM, leftChild.type); - Assertions.assertEquals("title", leftChild.field); - Assertions.assertEquals("hello", leftChild.value); + QsNode leftChild = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.TERM, leftChild.getType()); + Assertions.assertEquals("title", leftChild.getField()); + Assertions.assertEquals("hello", leftChild.getValue()); - QsNode rightChild = plan.root.children.get(1); - Assertions.assertEquals(QsClauseType.TERM, rightChild.type); - Assertions.assertEquals("content", rightChild.field); - Assertions.assertEquals("world", rightChild.value); + QsNode rightChild = plan.getRoot().getChildren().get(1); + Assertions.assertEquals(QsClauseType.TERM, rightChild.getType()); + Assertions.assertEquals("content", rightChild.getField()); + Assertions.assertEquals("world", rightChild.getValue()); // Should have 2 field bindings - Assertions.assertEquals(2, plan.fieldBindings.size()); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "title".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "content".equals(b.fieldName))); + Assertions.assertEquals(2, plan.getFieldBindings().size()); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "title".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "content".equals(b.getFieldName()))); } @Test @@ -188,8 +188,8 @@ public void testOrQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -198,13 +198,13 @@ public void testNotQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); - QsNode child = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.TERM, child.type); - Assertions.assertEquals("title", child.field); - Assertions.assertEquals("spam", child.value); + QsNode child = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.TERM, child.getType()); + Assertions.assertEquals("title", child.getField()); + Assertions.assertEquals("spam", child.getValue()); } @Test @@ -213,14 +213,14 @@ public void testComplexQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Should have 3 field bindings - Assertions.assertEquals(3, plan.fieldBindings.size()); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "title".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "content".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "category".equals(b.fieldName))); + Assertions.assertEquals(3, plan.getFieldBindings().size()); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "title".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "content".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "category".equals(b.getFieldName()))); } @Test @@ -229,9 +229,9 @@ public void testEmptyDsl() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("error", plan.root.field); - Assertions.assertEquals("empty_dsl", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("error", plan.getRoot().getField()); + Assertions.assertEquals("empty_dsl", plan.getRoot().getValue()); } @Test @@ -242,7 +242,7 @@ public void testInvalidDsl() { SearchDslParser.parseDsl(dsl); }); - Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL syntax")); + Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL")); } @Test @@ -258,9 +258,9 @@ public void testQsPlanSerialization() { QsPlan deserialized = QsPlan.fromJson(json); Assertions.assertNotNull(deserialized); - Assertions.assertEquals(plan.root.type, deserialized.root.type); - Assertions.assertEquals(plan.root.field, deserialized.root.field); - Assertions.assertEquals(plan.root.value, deserialized.root.value); + Assertions.assertEquals(plan.getRoot().getType(), deserialized.getRoot().getType()); + Assertions.assertEquals(plan.getRoot().getField(), deserialized.getRoot().getField()); + Assertions.assertEquals(plan.getRoot().getValue(), deserialized.getRoot().getValue()); } @Test @@ -269,9 +269,9 @@ public void testQuotedFieldNames() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals("field name", plan.root.field); - Assertions.assertEquals(1, plan.fieldBindings.size()); - Assertions.assertEquals("field name", plan.fieldBindings.get(0).fieldName); + Assertions.assertEquals("field name", plan.getRoot().getField()); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + Assertions.assertEquals("field name", plan.getFieldBindings().get(0).getFieldName()); } // ============ Tests for Default Field and Operator Support ============ @@ -283,11 +283,11 @@ public void testDefaultFieldWithSimpleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo", plan.root.value); - Assertions.assertEquals(1, plan.fieldBindings.size()); - Assertions.assertEquals("tags", plan.fieldBindings.get(0).fieldName); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo", plan.getRoot().getValue()); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + Assertions.assertEquals("tags", plan.getFieldBindings().get(0).getFieldName()); } @Test @@ -297,9 +297,9 @@ public void testDefaultFieldWithMultiTermAnd() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -309,9 +309,9 @@ public void testDefaultFieldWithMultiTermOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -321,9 +321,9 @@ public void testDefaultFieldWithMultiTermDefaultOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -333,9 +333,9 @@ public void testDefaultFieldWithWildcardSingleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PREFIX, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo*", plan.root.value); + Assertions.assertEquals(QsClauseType.PREFIX, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo*", plan.getRoot().getValue()); } @Test @@ -345,18 +345,18 @@ public void testDefaultFieldWithWildcardMultiTermAnd() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); - QsNode firstChild = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.PREFIX, firstChild.type); - Assertions.assertEquals("tags", firstChild.field); - Assertions.assertEquals("foo*", firstChild.value); + QsNode firstChild = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.PREFIX, firstChild.getType()); + Assertions.assertEquals("tags", firstChild.getField()); + Assertions.assertEquals("foo*", firstChild.getValue()); - QsNode secondChild = plan.root.children.get(1); - Assertions.assertEquals(QsClauseType.PREFIX, secondChild.type); - Assertions.assertEquals("tags", secondChild.field); - Assertions.assertEquals("bar*", secondChild.value); + QsNode secondChild = plan.getRoot().getChildren().get(1); + Assertions.assertEquals(QsClauseType.PREFIX, secondChild.getType()); + Assertions.assertEquals("tags", secondChild.getField()); + Assertions.assertEquals("bar*", secondChild.getValue()); } @Test @@ -366,8 +366,8 @@ public void testDefaultFieldWithWildcardMultiTermOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -377,16 +377,16 @@ public void testDefaultFieldWithExplicitOperatorOverride() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); - QsNode firstChild = plan.root.children.get(0); - Assertions.assertEquals("tags", firstChild.field); - Assertions.assertEquals("foo", firstChild.value); + QsNode firstChild = plan.getRoot().getChildren().get(0); + Assertions.assertEquals("tags", firstChild.getField()); + Assertions.assertEquals("foo", firstChild.getValue()); - QsNode secondChild = plan.root.children.get(1); - Assertions.assertEquals("tags", secondChild.field); - Assertions.assertEquals("bar", secondChild.value); + QsNode secondChild = plan.getRoot().getChildren().get(1); + Assertions.assertEquals("tags", secondChild.getField()); + Assertions.assertEquals("bar", secondChild.getValue()); } @Test @@ -396,8 +396,8 @@ public void testDefaultFieldWithExplicitAndOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -407,9 +407,9 @@ public void testDefaultFieldWithExactFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.EXACT, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.EXACT, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -419,9 +419,9 @@ public void testDefaultFieldWithAnyFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -431,9 +431,9 @@ public void testDefaultFieldWithAllFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -443,9 +443,9 @@ public void testDefaultFieldIgnoredWhenDslHasFieldReference() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("title", plan.root.field); // Should be "title", not "tags" - Assertions.assertEquals("hello", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); // Should be "title", not "tags" + Assertions.assertEquals("hello", plan.getRoot().getValue()); } @Test @@ -468,15 +468,15 @@ public void testDefaultOperatorCaseInsensitive() { // Test "AND" QsPlan plan1 = SearchDslParser.parseDsl(dsl, "tags", "AND"); - Assertions.assertEquals(QsClauseType.ALL, plan1.root.type); + Assertions.assertEquals(QsClauseType.ALL, plan1.getRoot().getType()); // Test "Or" QsPlan plan2 = SearchDslParser.parseDsl(dsl, "tags", "Or"); - Assertions.assertEquals(QsClauseType.ANY, plan2.root.type); + Assertions.assertEquals(QsClauseType.ANY, plan2.getRoot().getType()); // Test "aNd" QsPlan plan3 = SearchDslParser.parseDsl(dsl, "tags", "aNd"); - Assertions.assertEquals(QsClauseType.ALL, plan3.root.type); + Assertions.assertEquals(QsClauseType.ALL, plan3.getRoot().getType()); } @Test @@ -486,9 +486,9 @@ public void testDefaultFieldWithComplexWildcard() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.WILDCARD, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("*foo*", plan.root.value); + Assertions.assertEquals(QsClauseType.WILDCARD, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("*foo*", plan.getRoot().getValue()); } @Test @@ -499,8 +499,8 @@ public void testDefaultFieldWithMixedWildcards() { Assertions.assertNotNull(plan); // Should create AND query because it contains wildcards - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); } @Test @@ -510,9 +510,9 @@ public void testDefaultFieldWithQuotedPhrase() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PHRASE, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("hello world", plan.root.value); + Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("hello world", plan.getRoot().getValue()); } @Test @@ -522,13 +522,13 @@ public void testDefaultFieldWithNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); - QsNode child = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.TERM, child.type); - Assertions.assertEquals("tags", child.field); - Assertions.assertEquals("foo", child.value); + QsNode child = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.TERM, child.getType()); + Assertions.assertEquals("tags", child.getField()); + Assertions.assertEquals("foo", child.getValue()); } @Test @@ -542,7 +542,7 @@ public void testDefaultFieldWithEmptyString() { SearchDslParser.parseDsl(dsl, "", "and"); }); - Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL syntax")); + Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL")); } @Test @@ -552,7 +552,7 @@ public void testDefaultFieldWithNullOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); // Defaults to OR/ANY + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); // Defaults to OR/ANY } @Test @@ -562,9 +562,9 @@ public void testDefaultFieldWithSingleWildcardTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.WILDCARD, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("f?o", plan.root.value); + Assertions.assertEquals(QsClauseType.WILDCARD, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("f?o", plan.getRoot().getValue()); } @Test @@ -574,9 +574,9 @@ public void testDefaultFieldPreservesFieldBindings() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(1, plan.fieldBindings.size()); - Assertions.assertEquals("tags", plan.fieldBindings.get(0).fieldName); - Assertions.assertEquals(0, plan.fieldBindings.get(0).slotIndex); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + Assertions.assertEquals("tags", plan.getFieldBindings().get(0).getFieldName()); + Assertions.assertEquals(0, plan.getFieldBindings().get(0).getSlotIndex()); } // ============ Tests for Lucene Mode Parsing ============ @@ -589,13 +589,13 @@ public void testLuceneModeSimpleAndQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); - Assertions.assertEquals(Integer.valueOf(0), plan.root.minimumShouldMatch); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + Assertions.assertEquals(Integer.valueOf(0), plan.getRoot().getMinimumShouldMatch()); // Both children should have MUST occur - for (QsNode child : plan.root.children) { - Assertions.assertEquals(SearchDslParser.QsOccur.MUST, child.occur); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(SearchDslParser.QsOccur.MUST, child.getOccur()); } } @@ -607,16 +607,16 @@ public void testLuceneModeSimpleOrQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); - Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); // All children should have SHOULD occur - for (QsNode child : plan.root.children) { - Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, child.occur); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, child.getOccur()); } // minimum_should_match should be 1 (at least one must match) - Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); } @Test @@ -630,9 +630,9 @@ public void testLuceneModeAndOrMixed() { Assertions.assertNotNull(plan); // With minimum_should_match=0 and MUST clauses present, SHOULD is discarded // Only "a" remains with MUST - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); - Assertions.assertEquals("a", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); + Assertions.assertEquals("a", plan.getRoot().getValue()); } @Test @@ -650,23 +650,23 @@ public void testLuceneModeAndOrNotMixed() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // Should have 3 children: a(MUST), c(MUST_NOT), d(MUST) // b is filtered out because it becomes SHOULD - Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); - QsNode nodeA = plan.root.children.get(0); - Assertions.assertEquals("a", nodeA.value); - Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeA.occur); + QsNode nodeA = plan.getRoot().getChildren().get(0); + Assertions.assertEquals("a", nodeA.getValue()); + Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeA.getOccur()); - QsNode nodeC = plan.root.children.get(1); - Assertions.assertEquals("c", nodeC.value); - Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT, nodeC.occur); + QsNode nodeC = plan.getRoot().getChildren().get(1); + Assertions.assertEquals("c", nodeC.getValue()); + Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT, nodeC.getOccur()); - QsNode nodeD = plan.root.children.get(2); - Assertions.assertEquals("d", nodeD.value); - Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeD.occur); + QsNode nodeD = plan.getRoot().getChildren().get(2); + Assertions.assertEquals("d", nodeD.getValue()); + Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeD.getOccur()); } @Test @@ -680,9 +680,9 @@ public void testLuceneModeWithDefaultField() { Assertions.assertNotNull(plan); // With minimum_should_match=0, only aterm (MUST) remains - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("firstname", plan.root.field); - Assertions.assertEquals("aterm", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("firstname", plan.getRoot().getField()); + Assertions.assertEquals("aterm", plan.getRoot().getValue()); } @Test @@ -695,10 +695,10 @@ public void testLuceneModeNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); - Assertions.assertEquals(QsClauseType.TERM, plan.root.children.get(0).type); - Assertions.assertEquals(QsOccur.MUST_NOT, plan.root.children.get(0).occur); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getChildren().get(0).getType()); + Assertions.assertEquals(QsOccur.MUST_NOT, plan.getRoot().getChildren().get(0).getOccur()); } @Test @@ -709,10 +709,10 @@ public void testLuceneModeMinimumShouldMatchExplicit() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // All 3 terms should be present - Assertions.assertEquals(3, plan.root.children.size()); - Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); } @Test @@ -723,9 +723,9 @@ public void testLuceneModeSingleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); - Assertions.assertEquals("hello", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); + Assertions.assertEquals("hello", plan.getRoot().getValue()); } @Test @@ -736,19 +736,17 @@ public void testStandardModeUnchanged() { Assertions.assertNotNull(plan); // Standard mode uses traditional boolean algebra: OR at top level - Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); } @Test public void testLuceneModeInvalidJson() { - // Test: invalid JSON options should fall back to standard mode + // Test: invalid JSON options should throw an exception String dsl = "field:a AND field:b"; String options = "not valid json"; - QsPlan plan = SearchDslParser.parseDsl(dsl, options); - - Assertions.assertNotNull(plan); - // Should fall back to standard mode (AND type) - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + SearchDslParser.parseDsl(dsl, options); + }); } @Test @@ -758,7 +756,7 @@ public void testLuceneModeEmptyOptions() { QsPlan plan = SearchDslParser.parseDsl(dsl, ""); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); } // ============ Tests for Escape Handling ============ @@ -771,10 +769,10 @@ public void testEscapedSpaceInTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "First\ Value" -> "First Value" - Assertions.assertEquals("First Value", plan.root.value); + Assertions.assertEquals("First Value", plan.getRoot().getValue()); } @Test @@ -785,10 +783,10 @@ public void testEscapedParentheses() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "hello\(world\)" -> "hello(world)" - Assertions.assertEquals("hello(world)", plan.root.value); + Assertions.assertEquals("hello(world)", plan.getRoot().getValue()); } @Test @@ -799,10 +797,10 @@ public void testEscapedColon() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "value\:with\:colons" -> "value:with:colons" - Assertions.assertEquals("value:with:colons", plan.root.value); + Assertions.assertEquals("value:with:colons", plan.getRoot().getValue()); } @Test @@ -813,10 +811,10 @@ public void testEscapedBackslash() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "path\\to\\file" -> "path\to\file" - Assertions.assertEquals("path\\to\\file", plan.root.value); + Assertions.assertEquals("path\\to\\file", plan.getRoot().getValue()); } @Test @@ -826,8 +824,8 @@ public void testUppercaseAndOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -840,7 +838,7 @@ public void testLowercaseAndOperator() { Assertions.assertNotNull(plan); // Current behavior: lowercase 'and' IS an operator - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); // TODO: If PDF requires only uppercase, this should fail and return OR or different structure } @@ -851,8 +849,8 @@ public void testUppercaseOrOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -864,7 +862,7 @@ public void testLowercaseOrOperator() { Assertions.assertNotNull(plan); // Current behavior: lowercase 'or' IS an operator - Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); // TODO: If PDF requires only uppercase, this should fail } @@ -875,7 +873,7 @@ public void testUppercaseNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); } @Test @@ -887,7 +885,7 @@ public void testLowercaseNotOperator() { Assertions.assertNotNull(plan); // Current behavior: lowercase 'not' IS an operator - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); // TODO: If PDF requires only uppercase, this should fail } @@ -899,7 +897,7 @@ public void testExclamationNotOperator() { Assertions.assertNotNull(plan); // Current behavior: ! IS a NOT operator - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); } @Test @@ -911,8 +909,8 @@ public void testEscapedSpecialCharactersInQuoted() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PHRASE, plan.root.type); - Assertions.assertEquals("hello\\\"world", plan.root.value); + Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType()); + Assertions.assertEquals("hello\\\"world", plan.getRoot().getValue()); } @Test @@ -922,7 +920,402 @@ public void testNoEscapeWithoutBackslash() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("normalterm", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("normalterm", plan.getRoot().getValue()); + } + + // ============ Tests for Multi-Field Search ============ + + @Test + public void testMultiFieldSimpleTerm() { + // Test: "hello" + fields=["title","content"] → "(title:hello OR content:hello)" + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + + // Verify both fields are in bindings + Assertions.assertEquals(2, plan.getFieldBindings().size()); + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "title".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "content".equals(b.getFieldName()))); + } + + @Test + public void testMultiFieldMultiTermAnd() { + // Test: "hello world" + fields=["title","content"] + default_operator="and" + type="cross_fields" + // → "(title:hello OR content:hello) AND (title:world OR content:world)" + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + + // Each child should be an OR of two fields + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.OR, child.getType()); + Assertions.assertEquals(2, child.getChildren().size()); + } + } + + @Test + public void testMultiFieldMultiTermOr() { + // Test: "hello world" + fields=["title","content"] + default_operator="or" + // → "(title:hello OR content:hello) OR (title:world OR content:world)" + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"or\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + } + + @Test + public void testMultiFieldExplicitAndOperator() { + // Test: "hello AND world" + fields=["title","content"] + cross_fields + // → "(title:hello OR content:hello) AND (title:world OR content:world)" + String dsl = "hello AND world"; + String options = "{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + } + + @Test + public void testMultiFieldMixedWithExplicitField() { + // Test: "hello AND category:tech" + fields=["title","content"] + cross_fields + // → "(title:hello OR content:hello) AND category:tech" + String dsl = "hello AND category:tech"; + String options = "{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + + // Verify "category" is preserved + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "category".equals(b.getFieldName()))); + } + + @Test + public void testMultiFieldWithWildcard() { + // Test: "hello*" + fields=["title","content"] + String dsl = "hello*"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + + // Both should be PREFIX type + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.PREFIX, child.getType()); + } + } + + @Test + public void testMultiFieldWithExactFunction() { + // Test: "EXACT(foo bar)" + fields=["title","content"] + String dsl = "EXACT(foo bar)"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + + // Both should be EXACT type + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.EXACT, child.getType()); + } + } + + @Test + public void testMultiFieldThreeFields() { + // Test: "hello" + fields=["title","content","tags"] + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\",\"tags\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); + Assertions.assertEquals(3, plan.getFieldBindings().size()); + } + + @Test + public void testFieldsAndDefaultFieldMutuallyExclusive() { + // Test: specifying both fields and default_field should throw error + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_field\":\"tags\"}"; + + IllegalArgumentException exception = Assertions.assertThrows(IllegalArgumentException.class, () -> { + SearchDslParser.parseDsl(dsl, options); + }); + Assertions.assertTrue(exception.getMessage().contains("mutually exclusive")); + } + + @Test + public void testSingleFieldInArray() { + // Test: single field in array should work like default_field + String dsl = "hello"; + String options = "{\"fields\":[\"title\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + } + + @Test + public void testMultiFieldNotOperator() { + // Test: "NOT hello" + fields=["title","content"] + String dsl = "NOT hello"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getChildren().get(0).getType()); + } + + // ============ Tests for Multi-Field + Lucene Mode ============ + + @Test + public void testMultiFieldLuceneModeSimpleAnd() { + // Test: "a AND b" + fields=["title","content"] + lucene mode + cross_fields + // Expanded: "(title:a OR content:a) AND (title:b OR content:b)" + // With Lucene semantics: both groups are MUST + String dsl = "a AND b"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + + // Should have 2 children (two OR groups), both with MUST + // Note: In Lucene mode, OR groups are also wrapped as OCCUR_BOOLEAN + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsOccur.MUST, child.getOccur()); + // The child is OCCUR_BOOLEAN wrapping the OR group + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, child.getType()); + } + } + + @Test + public void testMultiFieldLuceneModeSimpleOr() { + // Test: "a OR b" + fields=["title","content"] + lucene mode + // Expanded: "(title:a OR content:a) OR (title:b OR content:b)" + // With Lucene semantics: both groups are SHOULD + String dsl = "a OR b"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + + // Should have 2 children, both with SHOULD + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsOccur.SHOULD, child.getOccur()); + } + + // minimum_should_match should be 1 + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); + } + + @Test + public void testMultiFieldLuceneModeAndOrMixed() { + // Test: "a AND b OR c" + fields=["title","content"] + lucene mode + minimum_should_match=0 + cross_fields + // With Lucene semantics and minimum_should_match=0: SHOULD groups are discarded + // Only "a" (MUST) remains - wrapped in OCCUR_BOOLEAN + String dsl = "a AND b OR c"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // With minimum_should_match=0, only (title:a OR content:a) remains + // In Lucene mode, this is wrapped as OCCUR_BOOLEAN + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + } + + @Test + public void testMultiFieldLuceneModeWithNot() { + // Test: "a AND NOT b" + fields=["title","content"] + lucene mode + cross_fields + // Expanded: "(title:a OR content:a) AND NOT (title:b OR content:b)" + String dsl = "a AND NOT b"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + + // Should have 2 children: a (MUST), b (MUST_NOT) + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + + // Find MUST and MUST_NOT children + boolean hasMust = plan.getRoot().getChildren().stream().anyMatch(c -> c.getOccur() == QsOccur.MUST); + boolean hasMustNot = plan.getRoot().getChildren().stream().anyMatch(c -> c.getOccur() == QsOccur.MUST_NOT); + Assertions.assertTrue(hasMust); + Assertions.assertTrue(hasMustNot); + } + + @Test + public void testMultiFieldLuceneModeSingleTerm() { + // Test: single term with multi-field + lucene mode + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // In Lucene mode, even single term OR groups are wrapped as OCCUR_BOOLEAN + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + // The OCCUR_BOOLEAN contains the OR group's children with SHOULD occur + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + } + + @Test + public void testMultiFieldLuceneModeComplexQuery() { + // Test: "(a OR b) AND NOT c" + fields=["f1","f2"] + lucene mode + cross_fields + String dsl = "(a OR b) AND NOT c"; + String options = "{\"fields\":[\"f1\",\"f2\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // Should have proper structure with MUST and MUST_NOT + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + } + + @Test + public void testMultiFieldLuceneModeMinimumShouldMatchOne() { + // Test: "a AND b OR c" with minimum_should_match=1 keeps all clauses + cross_fields + String dsl = "a AND b OR c"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":1,\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + // All 3 groups should be present + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); + } + + // ============ Tests for type parameter (best_fields vs cross_fields) ============ + + @Test + public void testMultiFieldBestFieldsDefault() { + // Test: best_fields is the default when type is not specified + // "hello world" with fields ["title", "content"] and default_operator "and" + // Expands to: (title:hello AND title:world) OR (content:hello AND content:world) + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // Root should be OR (joining fields) + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // 2 fields + + // Each child should be an AND of terms for that field + for (QsNode fieldGroup : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.AND, fieldGroup.getType()); + Assertions.assertEquals(2, fieldGroup.getChildren().size()); // 2 terms + } + } + + @Test + public void testMultiFieldBestFieldsExplicit() { + // Test: explicitly specify type=best_fields + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"type\":\"best_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + } + + @Test + public void testMultiFieldCrossFields() { + // Test: cross_fields mode + // "hello world" with fields ["title", "content"] and default_operator "and" + // Expands to: (title:hello OR content:hello) AND (title:world OR content:world) + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // Root should be AND (joining term groups) + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // 2 term groups + + // Each child should be an OR of the same term across fields + for (QsNode termGroup : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.OR, termGroup.getType()); + Assertions.assertEquals(2, termGroup.getChildren().size()); // 2 fields + } + } + + @Test + public void testMultiFieldBestFieldsLuceneMode() { + // Test: best_fields with Lucene mode + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"mode\":\"lucene\",\"type\":\"best_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + } + + @Test + public void testMultiFieldCrossFieldsLuceneMode() { + // Test: cross_fields with Lucene mode + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"mode\":\"lucene\",\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + } + + @Test + public void testMultiFieldInvalidType() { + // Test: invalid type value should throw exception + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"],\"type\":\"invalid_type\"}"; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + SearchDslParser.parseDsl(dsl, options); + }); + } + + @Test + public void testMultiFieldSingleTermSameResultForBothTypes() { + // Test: single term should have same structure for both types + // since there's only one term, no difference between best_fields and cross_fields + String dsl = "hello"; + String optionsBestFields = "{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\"}"; + String optionsCrossFields = "{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}"; + + QsPlan planBest = SearchDslParser.parseDsl(dsl, optionsBestFields); + QsPlan planCross = SearchDslParser.parseDsl(dsl, optionsCrossFields); + + Assertions.assertNotNull(planBest); + Assertions.assertNotNull(planCross); + // Both should have same structure: (title:hello OR content:hello) + Assertions.assertEquals(planBest.getRoot().getType(), planCross.getRoot().getType()); + Assertions.assertEquals(planBest.getRoot().getChildren().size(), planCross.getRoot().getChildren().size()); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java index fd6f03743aebf6..919bebea2524fc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java @@ -58,9 +58,9 @@ public void testGetQsPlan() { SearchDslParser.QsPlan plan = searchFunc.getQsPlan(); Assertions.assertNotNull(plan); - Assertions.assertNotNull(plan.root); - Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.fieldBindings.size()); + Assertions.assertNotNull(plan.getRoot()); + Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getFieldBindings().size()); } @Test @@ -151,11 +151,11 @@ public void testComplexDslParsing() { SearchDslParser.QsPlan plan = searchFunc.getQsPlan(); Assertions.assertNotNull(plan); - Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Should detect 3 unique fields: title, content, category - Assertions.assertEquals(3, plan.fieldBindings.size()); + Assertions.assertEquals(3, plan.getFieldBindings().size()); } @Test @@ -169,7 +169,7 @@ public void testInvalidDslHandling() { searchFunc.getQsPlan(); Assertions.assertTrue(false, "Expected exception for invalid DSL"); } catch (RuntimeException e) { - Assertions.assertTrue(e.getMessage().contains("Invalid search DSL syntax")); + Assertions.assertTrue(e.getMessage().contains("Invalid search DSL")); } } } diff --git a/regression-test/data/search/test_search_multi_field.out b/regression-test/data/search/test_search_multi_field.out new file mode 100644 index 00000000000000..4a4923a4c3b50e --- /dev/null +++ b/regression-test/data/search/test_search_multi_field.out @@ -0,0 +1,126 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !multi_field_single_term -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !multi_field_multi_term_and -- +1 machine learning basics +9 machine guide + +-- !multi_field_multi_term_and_lucene -- +1 machine learning basics +9 machine guide + +-- !multi_field_multi_term_or -- +1 machine learning basics +4 machine maintenance +5 learning guitar +6 deep learning neural networks +8 cooking machine reviews +9 machine guide + +-- !multi_field_explicit_and -- +1 machine learning basics +9 machine guide + +-- !multi_field_mixed -- +1 machine learning basics tech +9 machine guide tech + +-- !three_fields -- +1 machine learning basics +3 AI in healthcare +6 deep learning neural networks + +-- !multi_field_wildcard -- +1 machine learning basics +5 learning guitar +6 deep learning neural networks +9 machine guide + +-- !multi_field_not -- +1 machine learning basics +4 machine maintenance +9 machine guide + +-- !multi_field_complex -- +1 machine learning basics +3 AI in healthcare +4 machine maintenance +6 deep learning neural networks +9 machine guide + +-- !single_field_array -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !multi_field_lucene_and -- +1 machine learning basics +9 machine guide + +-- !multi_field_cross_fields_verify -- +1 machine learning basics introduction to AI and ML +9 machine guide learning tips + +-- !multi_field_lucene_or -- +1 machine learning basics +2 cooking recipes +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !multi_field_lucene_and_or -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !multi_field_lucene_min_should_1 -- +1 machine learning basics +8 cooking machine reviews +9 machine guide + +-- !multi_field_lucene_and_not -- +1 machine learning basics +8 cooking machine reviews +9 machine guide + +-- !compare_default_field -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !compare_fields_single -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !multi_field_exact -- + +-- !multi_field_any -- +1 machine learning basics +2 cooking recipes +4 machine maintenance +8 cooking machine reviews +9 machine guide + +-- !multi_field_best_fields_default -- +1 machine learning basics + +-- !multi_field_cross_fields -- +1 machine learning basics +9 machine guide + +-- !multi_field_best_fields_lucene -- +1 machine learning basics + +-- !multi_field_cross_fields_lucene -- +1 machine learning basics +9 machine guide + diff --git a/regression-test/suites/search/test_search_multi_field.groovy b/regression-test/suites/search/test_search_multi_field.groovy new file mode 100644 index 00000000000000..f71db33f2b050f --- /dev/null +++ b/regression-test/suites/search/test_search_multi_field.groovy @@ -0,0 +1,297 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/** + * Tests for multi-field search support in search() function. + * + * The 'fields' parameter allows searching across multiple fields with a single query term. + * This is similar to Elasticsearch's query_string 'fields' parameter. + * + * Example: + * search('hello', '{"fields":["title","content"]}') + * -> Equivalent to: (title:hello OR content:hello) + * + * search('hello world', '{"fields":["title","content"],"default_operator":"and"}') + * -> Equivalent to: (title:hello OR content:hello) AND (title:world OR content:world) + * + * Multi-field search can also be combined with Lucene mode for MUST/SHOULD/MUST_NOT semantics. + */ +suite("test_search_multi_field") { + def tableName = "search_multi_field_test" + + sql "DROP TABLE IF EXISTS ${tableName}" + + // Create table with inverted indexes on multiple fields + sql """ + CREATE TABLE ${tableName} ( + id INT, + title VARCHAR(200), + content VARCHAR(500), + tags VARCHAR(100), + category VARCHAR(50), + INDEX idx_title(title) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_content(content) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_tags(tags) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_category(category) USING INVERTED + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1") + """ + + // Insert test data + // Note: id=9 is specifically designed to test cross_fields vs best_fields behavior + // - cross_fields: matches (title has 'machine', content has 'learning') + // - best_fields: does NOT match (no single field has both terms) + sql """INSERT INTO ${tableName} VALUES + (1, 'machine learning basics', 'introduction to AI and ML', 'ml ai tutorial', 'tech'), + (2, 'cooking recipes', 'how to make pasta', 'food cooking', 'lifestyle'), + (3, 'AI in healthcare', 'artificial intelligence applications', 'health ai', 'tech'), + (4, 'machine maintenance', 'keeping machines running', 'industrial', 'engineering'), + (5, 'learning guitar', 'music lessons for beginners', 'music learning', 'entertainment'), + (6, 'deep learning neural networks', 'advanced AI concepts', 'ai ml deep', 'tech'), + (7, 'car maintenance guide', 'vehicle repair tips', 'auto maintenance', 'automotive'), + (8, 'cooking machine reviews', 'kitchen appliance ratings', 'cooking appliances', 'lifestyle'), + (9, 'machine guide', 'learning tips', 'howto', 'tech') + """ + + // Wait for index building + Thread.sleep(5000) + + // ============ Test 1: Single term across multiple fields ============ + // "machine" in title OR content + qt_multi_field_single_term """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 2: Multiple terms with AND (cross_fields) ============ + // "machine" AND "learning" across title,content with cross_fields semantics + // cross_fields: terms can be across different fields + // id=1: title has both terms + // id=9: title has "machine", content has "learning" (cross_fields match) + qt_multi_field_multi_term_and """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 2b: Multiple terms with AND in Lucene mode (cross_fields) ============ + // Same as Test 2 but with mode:lucene - should have same result + // Uses cross_fields semantics explicitly + qt_multi_field_multi_term_and_lucene """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene","type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 3: Multiple terms with OR (default) ============ + qt_multi_field_multi_term_or """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"or"}') + ORDER BY id + """ + + // ============ Test 4: Explicit AND operator in DSL (cross_fields) ============ + // Uses explicit type:cross_fields for backward compatibility + qt_multi_field_explicit_and """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning', '{"fields":["title","content"],"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 5: Mixed - some terms with explicit field (cross_fields) ============ + qt_multi_field_mixed """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, category + FROM ${tableName} + WHERE search('machine AND category:tech', '{"fields":["title","content"],"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 6: Three fields ============ + qt_three_fields """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('ai', '{"fields":["title","content","tags"]}') + ORDER BY id + """ + + // ============ Test 7: Wildcard across fields ============ + qt_multi_field_wildcard """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('learn*', '{"fields":["title","content","tags"]}') + ORDER BY id + """ + + // ============ Test 8: NOT operator (cross_fields) ============ + qt_multi_field_not """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND NOT cooking', '{"fields":["title","content"],"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 9: Complex boolean (cross_fields) ============ + qt_multi_field_complex """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('(machine OR ai) AND NOT cooking', '{"fields":["title","content"],"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 10: Single field in array (backward compatible) ============ + qt_single_field_array """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"fields":["title"]}') + ORDER BY id + """ + + // ============ Test 11: Multi-field with Lucene mode - simple AND (cross_fields) ============ + // This is equivalent to Test 2 but uses Lucene mode with explicit AND operator + // Uses cross_fields semantics explicitly + qt_multi_field_lucene_and """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0,"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 11b: Verify cross_fields behavior explicitly ============ + // This test verifies cross_fields semantics (like ES type:cross_fields) + // Query: "machine AND learning" across title and content + // id=9 has 'machine' in title and 'learning' in content - should match with cross_fields + qt_multi_field_cross_fields_verify """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, content + FROM ${tableName} + WHERE search('machine AND learning', '{"fields":["title","content"],"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 12: Multi-field with Lucene mode - OR ============ + qt_multi_field_lucene_or """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine OR cooking', '{"fields":["title","content"],"mode":"lucene"}') + ORDER BY id + """ + + // ============ Test 13: Multi-field with Lucene mode - AND OR mixed (cross_fields) ============ + // With minimum_should_match=0, SHOULD clauses are discarded when MUST exists + qt_multi_field_lucene_and_or """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0,"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 14: Multi-field with Lucene mode - minimum_should_match=1 (cross_fields) ============ + qt_multi_field_lucene_min_should_1 """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":1,"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 15: Multi-field with Lucene mode - AND NOT (cross_fields) ============ + qt_multi_field_lucene_and_not """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND NOT maintenance', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0,"type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 16: Comparison - same query with default_field vs fields ============ + // Using default_field (single field) + qt_compare_default_field """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"default_field":"title"}') + ORDER BY id + """ + + // Using fields array with single field (should be same as default_field) + qt_compare_fields_single """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"fields":["title"]}') + ORDER BY id + """ + + // ============ Test 17: EXACT function across fields ============ + qt_multi_field_exact """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('EXACT(machine learning)', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 18: ANY function across fields ============ + qt_multi_field_any """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('ANY(machine cooking)', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 19: best_fields mode (default) ============ + // With best_fields, all terms must be in the SAME field + // Only id=1 matches: title has both "machine" and "learning" + // id=9 does NOT match: "machine" in title, "learning" in content (different fields) + qt_multi_field_best_fields_default """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and"}') + ORDER BY id + """ + + // ============ Test 20: cross_fields mode (explicit) ============ + // With cross_fields, terms can be in DIFFERENT fields + // Both id=1 and id=9 match + qt_multi_field_cross_fields """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 21: best_fields with Lucene mode ============ + qt_multi_field_best_fields_lucene """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene","type":"best_fields"}') + ORDER BY id + """ + + // ============ Test 22: cross_fields with Lucene mode ============ + qt_multi_field_cross_fields_lucene """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene","type":"cross_fields"}') + ORDER BY id + """ + + // Cleanup + sql "DROP TABLE IF EXISTS ${tableName}" +}