From e6a16eaa42158c1a3af1667d5d67ca82f8d08757 Mon Sep 17 00:00:00 2001 From: eanzhao Date: Fri, 22 May 2026 01:34:57 +0800 Subject: [PATCH] Resolve Elasticsearch exact-match field paths from live index mapping PR #665's `_id`-suffix -> keyword augmentation made BuildExactMatchFieldPathResolver trust code intent: for an augmented-keyword field it emitted the bare field path. Elasticsearch indexes created before 2026-05-18 keep their dynamic `text` + `.keyword` mapping, so `term` queries hit the analyzed `text` field and returned 0 hits for identifier-shaped values -- the Lark bot outage reported in #743. QueryAsync now resolves keyword/text field paths from the target index's live `_mapping` (GET /_mapping, cached per index), falling back to declared metadata only when the mapping cannot be read. Reading `_mapping` is a schema read -- no mapping mutation, reindex, backfill, or event replay. Index creation and descriptor augmentation are unchanged. ADR-0025 records the decision (it scopes blueprint hard-constraint #2 to the write/index-init path). Addresses #743; the alias/migration index-lifecycle phases P1-P3/P5 remain tracked by #743. Verification: - dotnet test Aevatar.CQRS.Projection.Core.Tests -- 136 passed, 1 skipped - tools/docs/lint.sh -- passed - architecture query/projection guards -- passed Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/README.md | 1 + ...xact-match-resolution-reads-index-truth.md | 125 ++++++++++ ...arch-projection-index-mapping-blueprint.md | 2 + .../README.md | 10 +- .../ElasticsearchIndexLifecycleManager.cs | 74 +++++- .../ElasticsearchProjectionDocumentStore.cs | 37 ++- ...hProjectionDocumentStoreMetadataSupport.cs | 64 ++++- ...rchProjectionDocumentStoreBehaviorTests.cs | 234 +++++++++++++----- 8 files changed, 473 insertions(+), 74 deletions(-) create mode 100644 docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md diff --git a/docs/README.md b/docs/README.md index 9bdbdc560..9a8feda7f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -54,6 +54,7 @@ Immutable records of architectural choices and their rationale. - [0022 — OpenTelemetry semantic conventions for aevatar.* activities](adr/0022-otel-aevatar-semantic-conventions.md) - [0023 — Two-tier Inspector architecture (canonical readmodel vs observation OTel)](adr/0023-two-tier-inspector-architecture.md) - [Chat Route Policy — Config Actor + Boundary Resolver](adr/0024-chat-route-policy.md) +- [Elasticsearch exact-match field resolution reads live index mapping](adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md) ## History diff --git a/docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md b/docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md new file mode 100644 index 000000000..062a45d49 --- /dev/null +++ b/docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md @@ -0,0 +1,125 @@ +--- +title: Elasticsearch exact-match field resolution reads live index mapping +status: Accepted +owner: eanzhao +--- + +# ADR-0025: Elasticsearch exact-match field resolution reads live index mapping + +## Context + +PR #665 ("Stabilize Elasticsearch projection index mappings", merged 2026-05-18; +design doc `docs/design/2026-05-15-elasticsearch-projection-index-mapping-blueprint.md`) +added `ElasticsearchProjectionDescriptorMappingSupport.AugmentMetadata`: for every +read-model string field whose name matches a stable-identifier shape (`*_id`, +`*_key`, `*_hash`, `*_status`, `*_kind`, `*_type`, ...), the provider injects a +`{"type":"keyword"}` entry into the in-memory `DocumentIndexMetadata.Mappings`. + +`BuildExactMatchFieldPathResolver` consulted that augmented metadata to decide +whether an exact-match (`term`) filter targets a field directly or through its +`.keyword` sub-field. The contradiction: + +- **Augmented metadata is the code's *intent*** — it says "this field should be + a keyword." +- **An Elasticsearch index created before that intent shipped keeps its original + mapping forever.** A string field on such an index carries the ES dynamic + default — `text` with a `.keyword` multi-field — and `EnsureIndexAsync` never + reconciles an already-existing index (it treats `resource_already_exists` as + "done"). + +For any index created before 2026-05-18, the resolver therefore saw `keyword` +(intent) and emitted the bare field path, while the field was physically `text` +(truth). The `term` query hit the analyzed `text` field and returned **0 hits** +for identifier-shaped values — silently. This took down the Lark bot on +2026-05-20 (issue #743): the relay callback's scope resolver could not resolve +`apiKeyId → scopeId`, and every inbound relay callback returned 401. + +The 2026-05-15 blueprint anticipated incompatibility (§5 hard-constraint #2: the +mapping helper works only from the proto contract + declared +`DocumentIndexMetadata`, never from runtime index state; #9: incompatible +contract changes require a manual clear/rebuild). That stance is defensible for +*index creation*, but it left the *read path* trusting intent over physical +truth, and the rebuild runbook was enforced by no gate. + +## Decision + +### D1 — The exact-match resolver reads the live index `_mapping` + +`ElasticsearchProjectionDocumentStore.QueryAsync` resolves `keyword`/`text` field +paths from the **actual** Elasticsearch mapping of the target index, obtained via +`GET /_mapping` (`ElasticsearchIndexLifecycleManager.GetActualFieldMappingsAsync`), +not from the code-side augmented `DocumentIndexMetadata`. + +This narrows blueprint hard-constraint #2 for the read path only: exact-match +`term` resolution is now sourced from index truth. Index *creation* still works +purely from the proto contract + declared metadata — `AugmentMetadata` and +`EnsureIndexAsync` are unchanged. + +### D2 — Reading mapping is not query-time repair + +`GET _mapping` reads index schema metadata. It performs no mapping mutation, no +reindex, no document backfill, and no event replay. The query path stays free of +repair/priming side effects (CLAUDE.md "query path 禁止执行 mapping mutation / +repair"; blueprint §5 #3). The provider still does not do online index repair or +document-level dual-read. + +### D3 — Probe failure falls back to declared metadata + +When the `_mapping` probe cannot read physical truth (index absent, ES +unreachable, HTTP timeout, unparseable body), the resolver falls back to the +augmented `DocumentIndexMetadata` — the pre-#743 behaviour. A best-effort probe +must never turn a transient mapping-endpoint failure into a query failure. + +### D4 — The probe result is cached per index for the store lifetime + +`GetActualFieldMappingsAsync` caches a successful read per index name. Steady-state +cost is one extra `GET _mapping` per index per process. Mapping drift within a +process lifetime is not a concern for stable query fields — they exist in the +proto contract from the start; a process restart re-probes. + +### D5 — Scope: query path only + +This ADR fixes the exact-match *filter* resolution that caused #743. It does not +introduce alias indirection, schema fingerprinting, blue-green reindex migration, +or a real-Elasticsearch CI suite. Those (issue #743 phases P1–P3, P5) remain +tracked by #743 as a separate index-lifecycle effort; they are required neither to +recover the outage nor to make the query path drift-tolerant. + +## Alternatives considered + +- **Revert #665.** Rejected: descriptor-driven keyword mapping for new indices is + correct and wanted. The missing piece is read-path drift tolerance, not the + augmentation itself. +- **Heuristic patch to the resolver** (e.g. "always also try `.keyword`"). + Rejected — #743 non-goal #8. A blind second guess deepens implicit-convention + debt; reading the index's real mapping is ground truth, not a heuristic. +- **Manual clear/rebuild runbook** (the blueprint's original stance). Rejected as + the *primary* mechanism: it is enforced by no gate and already failed in + production. Reading index truth makes the query path correct without an + operator step. +- **The full index-lifecycle epic now** (alias + fingerprint + migration + + Testcontainers). Deferred: too large for one PR onto the live deploy branch and + unnecessary to recover the outage. Tracked by #743. + +## Consequences + +- Every projection index created before 2026-05-18 with dynamic string mappings + now answers identifier-shaped exact-match queries correctly — the Lark + registration lookup and every latent variant recover without an operator + touching production. +- One additional cached `GET _mapping` round-trip per index per process. +- `src/Aevatar.CQRS.Projection.Providers.Elasticsearch/README.md` "自动索引映射" + is updated: the provider reads live mapping for read-side field resolution (it + still does not repair or rebuild indices). +- The blueprint's "no runtime index state" constraint now has a recorded, scoped + exception; future read-path work references this ADR instead of silently + re-deciding. + +## References + +- Issue #743 — ES projection index lifecycle: schema-drift gap silently breaks + by-field queries (Lark bot outage 2026-05-20). +- PR #665 — Stabilize Elasticsearch projection index mappings. +- `docs/design/2026-05-15-elasticsearch-projection-index-mapping-blueprint.md` — + §5 hard-constraints #2/#3, §9 target architecture. +- CLAUDE.md — "权威状态 / ReadModel / Projection(强制)", "正确架构优先". diff --git a/docs/design/2026-05-15-elasticsearch-projection-index-mapping-blueprint.md b/docs/design/2026-05-15-elasticsearch-projection-index-mapping-blueprint.md index c768b0bce..8acee4a7c 100644 --- a/docs/design/2026-05-15-elasticsearch-projection-index-mapping-blueprint.md +++ b/docs/design/2026-05-15-elasticsearch-projection-index-mapping-blueprint.md @@ -95,6 +95,8 @@ owner: aevatar-core 9. 重构语义必须诚实: 新 mapping 契约不兼容旧 index 时,直接要求清空 / 重建 projection index;不在应用读路径里偷偷修复,也不为未投产历史数据设计兼容层。 10. 本设计不得引入内部泛化 `Metadata` bag;`DocumentIndexMetadata` 是 Elasticsearch index 边界元信息,允许保留该命名。 +> **修订(2026-05-22,ADR-0025)**:约束 #2、#9 适用于 index 初始化与 mapping augmentation helper。exact-match(`term`)查询的字段路径解析已改为读取目标 index 的实时 `_mapping`——augmented metadata 是代码意图,2026-05-18 之前创建的 index 的物理 mapping 才是事实,二者背离曾导致 #743 线上故障。读取 `_mapping` 不做 mutation / repair / replay,仍满足约束 #3。详见 `docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md`。 + ## 6. 当前基线 ### 6.1 当前正确部分 diff --git a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/README.md b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/README.md index 82751732a..fd3e02235 100644 --- a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/README.md +++ b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/README.md @@ -32,9 +32,17 @@ Elasticsearch Document Provider。 - 新建索引时,provider 会基于 read model 的 protobuf descriptor 补齐低风险稳定字段映射:root-level `google.protobuf.Timestamp` 映射为 `date`,root-level 稳定字符串标识字段(如 `id`、`actor_id`、`last_event_id`、`*_id`、`*_key`、`*_hash`、`*_status`、`*_kind`、`*_type`、`*_type_url`)映射为 `keyword` - `DocumentIndexMetadata` 中显式声明的 mapping 优先,provider 不覆盖自定义 `text`、analyzer、object、nested 或其他业务 mapping - `google.protobuf.Any`、`google.protobuf.Struct`、map、repeated message 与 repeated scalar 字段默认保持开放,不由通用 helper 递归展开 -- mapping 契约变更不兼容旧 Elasticsearch index 时,直接清空或重建 projection index;provider 不做旧索引在线修复、双读 fallback 或 query-time mapping repair +- mapping 契约变更不兼容旧 Elasticsearch index 时,index 初始化仍按当前契约创建**新** index;provider 不在读路径在线修复、重建或 mutate 旧 index - `AutoCreateIndex=true` 只会在缺失 index 时按当前契约创建新 index;如果需要保留数据,应通过 projection 重放或外部重建流程恢复数据 +## 精确匹配字段路径解析 + +- 精确匹配(`term` / `terms`)过滤的 `keyword` / `text` 字段路径解析基于目标 index 的**实时** `_mapping`(`GET /_mapping`),而非代码侧 augmented metadata +- 原因:augmented metadata 是代码意图;2026-05-18 之前创建的 index 上 `*_id` 等字符串字段可能仍是 dynamic `text` + `.keyword` multi-field。二者背离会让 `term` 查询命中 analyzed `text` 字段并对 identifier 形态的值返回 0 命中(见 issue #743、ADR-0025) +- 读取 `_mapping` 只读取 index schema,不做 mapping mutation / reindex / 文档回填 / event replay;成功的探测结果按 index 缓存 +- `_mapping` 探测失败(index 缺失、ES 不可达、超时、响应不可解析)时回退到 declared / augmented metadata,即 #743 之前的解析行为 +- 决策记录见 `docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md` + 参考: - [_id field](https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/mapping-id-field) diff --git a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchIndexLifecycleManager.cs b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchIndexLifecycleManager.cs index 30fc0f866..b363b2d5f 100644 --- a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchIndexLifecycleManager.cs +++ b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchIndexLifecycleManager.cs @@ -6,8 +6,11 @@ namespace Aevatar.CQRS.Projection.Providers.Elasticsearch.Stores; internal sealed class ElasticsearchIndexLifecycleManager : IDisposable { private readonly SemaphoreSlim _initLock = new(1, 1); + private readonly SemaphoreSlim _mappingProbeLock = new(1, 1); private readonly Lock _stateGate = new(); private readonly HashSet _initializedIndices = new(StringComparer.Ordinal); + private readonly Dictionary> _actualFieldMappingsByIndex = + new(StringComparer.Ordinal); private readonly HttpClient _httpClient; private readonly bool _autoCreate; @@ -75,5 +78,74 @@ private void MarkInitialized(string indexName) _initializedIndices.Add(indexName); } - public void Dispose() => _initLock.Dispose(); + /// + /// Reads the live Elasticsearch _mapping for an index so the query path can resolve + /// keyword/text field paths from physical truth rather than code-side augmented metadata. + /// Returns null when the index is absent or the mapping cannot be read; callers then + /// fall back to declared metadata. Successful reads are cached for the manager lifetime. + /// + public async Task?> GetActualFieldMappingsAsync( + string indexName, + CancellationToken ct) + { + lock (_stateGate) + { + if (_actualFieldMappingsByIndex.TryGetValue(indexName, out var cached)) + return cached; + } + + await _mappingProbeLock.WaitAsync(ct); + try + { + lock (_stateGate) + { + if (_actualFieldMappingsByIndex.TryGetValue(indexName, out var cached)) + return cached; + } + + var mappings = await ReadActualFieldMappingsAsync(indexName, ct); + if (mappings == null) + return null; + + lock (_stateGate) + _actualFieldMappingsByIndex[indexName] = mappings; + return mappings; + } + finally + { + _mappingProbeLock.Release(); + } + } + + private async Task?> ReadActualFieldMappingsAsync( + string indexName, + CancellationToken ct) + { + try + { + using var response = await _httpClient.GetAsync($"{indexName}/_mapping", ct); + if (!response.IsSuccessStatusCode) + return null; + + var payload = await response.Content.ReadAsStringAsync(ct); + return ElasticsearchProjectionDocumentStoreMetadataSupport + .TryExtractFieldMappingsFromMappingResponse(payload, indexName); + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; + } + catch (Exception ex) when (ex is HttpRequestException or OperationCanceledException) + { + // Best-effort probe: an unreachable mapping endpoint or HTTP timeout must not fail the + // query. The caller falls back to declared metadata (pre-existing resolution behaviour). + return null; + } + } + + public void Dispose() + { + _initLock.Dispose(); + _mappingProbeLock.Dispose(); + } } diff --git a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStore.cs b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStore.cs index 07ef25b70..216f32f9e 100644 --- a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStore.cs +++ b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStore.cs @@ -36,7 +36,7 @@ public sealed class ElasticsearchProjectionDocumentStore private readonly DocumentIndexMetadata _indexMetadata; private readonly Func? _indexScopeSelector; private readonly Func _fieldPathResolver; - private readonly Func _exactMatchFieldPathResolver; + private readonly IReadOnlyDictionary _descriptorFieldMap; private readonly ILogger> _logger; public ElasticsearchProjectionDocumentStore( @@ -98,7 +98,7 @@ public ElasticsearchProjectionDocumentStore( _indexScopeSelector = indexScopeSelector; _defaultSortField = options.DefaultSortField?.Trim() ?? ""; _fieldPathResolver = BuildFieldPathResolver(descriptor); - _exactMatchFieldPathResolver = BuildExactMatchFieldPathResolver(descriptor, _indexMetadata); + _descriptorFieldMap = BuildDescriptorFieldMap(descriptor); _logger = logger ?? NullLogger>.Instance; _indexManager = new ElasticsearchIndexLifecycleManager(_httpClient, _autoCreateIndex); @@ -206,6 +206,7 @@ public async Task> QueryAsync( ct.ThrowIfCancellationRequested(); ThrowIfDynamicReadModelQueriesUnsupported("query"); await _indexManager.EnsureIndexAsync(_indexName, _indexMetadata, ct); + var exactMatchFieldPathResolver = await BuildExactMatchFieldPathResolverAsync(ct); var boundedTake = Math.Clamp(query.Take <= 0 ? 50 : query.Take, 1, _queryTakeMax); using var request = new HttpRequestMessage(HttpMethod.Post, $"{_indexName}/_search") @@ -216,7 +217,7 @@ public async Task> QueryAsync( _defaultSortField, boundedTake, _fieldPathResolver, - _exactMatchFieldPathResolver), + exactMatchFieldPathResolver), Encoding.UTF8, "application/json"), }; @@ -284,11 +285,23 @@ private static Func BuildFieldPathResolver(MessageDescriptor des return fieldPath => ResolveFieldPath(descriptor, fieldPath); } - private static Func BuildExactMatchFieldPathResolver( - MessageDescriptor descriptor, - DocumentIndexMetadata indexMetadata) + private async Task> BuildExactMatchFieldPathResolverAsync( + CancellationToken ct) + { + // Exact-match (term) filters must target the field path that physically exists in + // Elasticsearch. The resolver consults the live index `_mapping`, not the code-side + // augmented `DocumentIndexMetadata`: a string field that augmented metadata declares + // `keyword` may still be a dynamic `text` + `.keyword` multi-field on any index created + // before that declaration shipped. When the live mapping cannot be read, fall back to the + // declared metadata (pre-existing behaviour). + // See docs/adr/0025-elasticsearch-exact-match-resolution-reads-index-truth.md. + var actualFieldMappings = await _indexManager.GetActualFieldMappingsAsync(_indexName, ct); + return BuildExactMatchFieldPathResolver(actualFieldMappings ?? _indexMetadata.Mappings); + } + + private Func BuildExactMatchFieldPathResolver( + IReadOnlyDictionary mappings) { - var descriptorFieldMap = BuildDescriptorFieldMap(descriptor); return (filter, resolvedFieldPath) => { if (resolvedFieldPath.EndsWith(".keyword", StringComparison.Ordinal)) @@ -298,20 +311,20 @@ private static Func BuildExactMatchFie return resolvedFieldPath; if (ElasticsearchProjectionDocumentStoreMetadataSupport.TryGetFieldMapping( - indexMetadata.Mappings, + mappings, resolvedFieldPath, - out var explicitMapping)) + out var fieldMapping)) { - if (ElasticsearchProjectionDocumentStoreMetadataSupport.IsKeywordFieldMapping(explicitMapping)) + if (ElasticsearchProjectionDocumentStoreMetadataSupport.IsKeywordFieldMapping(fieldMapping)) return resolvedFieldPath; - if (ElasticsearchProjectionDocumentStoreMetadataSupport.HasKeywordMultiField(explicitMapping)) + if (ElasticsearchProjectionDocumentStoreMetadataSupport.HasKeywordMultiField(fieldMapping)) return $"{resolvedFieldPath}.keyword"; return resolvedFieldPath; } - return descriptorFieldMap.TryGetValue(resolvedFieldPath, out var field) && + return _descriptorFieldMap.TryGetValue(resolvedFieldPath, out var field) && field.FieldType == FieldType.String ? $"{resolvedFieldPath}.keyword" : resolvedFieldPath; diff --git a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStoreMetadataSupport.cs b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStoreMetadataSupport.cs index d145ddd3f..4c6ef8b8e 100644 --- a/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStoreMetadataSupport.cs +++ b/src/Aevatar.CQRS.Projection.Providers.Elasticsearch/Stores/ElasticsearchProjectionDocumentStoreMetadataSupport.cs @@ -99,7 +99,7 @@ value is double || $"{context} contains unsupported value type '{value.GetType().FullName}'."); } - private static object? NormalizeJsonElement(JsonElement element, string context) + internal static object? NormalizeJsonElement(JsonElement element, string context) { return element.ValueKind switch { @@ -250,4 +250,66 @@ internal static bool TryGetFieldMapping( return false; } + + /// + /// Extracts the field-mapping dictionary from an Elasticsearch GET <index>/_mapping + /// response so the query path can resolve keyword/text field paths from physical index truth. + /// The returned dictionary is shaped like (it carries + /// the properties map) and is safe to pass to . + /// Returns null when the payload is empty or not a recognizable mapping response. + /// + internal static IReadOnlyDictionary? TryExtractFieldMappingsFromMappingResponse( + string mappingResponseJson, + string indexName) + { + if (string.IsNullOrWhiteSpace(mappingResponseJson)) + return null; + + try + { + using var document = JsonDocument.Parse(mappingResponseJson); + if (document.RootElement.ValueKind != JsonValueKind.Object) + return null; + + // GET /_mapping returns { "": { "mappings": { "properties": {...} } } }. + if (!TryResolveIndexNode(document.RootElement, indexName, out var indexNode) || + !indexNode.TryGetProperty("mappings", out var mappingsNode) || + mappingsNode.ValueKind != JsonValueKind.Object) + { + return null; + } + + return NormalizeJsonElement(mappingsNode, "Elasticsearch _mapping response") + as IReadOnlyDictionary; + } + catch (Exception ex) when (ex is JsonException or InvalidOperationException or FormatException) + { + return null; + } + } + + private static bool TryResolveIndexNode(JsonElement root, string indexName, out JsonElement indexNode) + { + if (!string.IsNullOrWhiteSpace(indexName) && + root.TryGetProperty(indexName, out var namedNode) && + namedNode.ValueKind == JsonValueKind.Object) + { + indexNode = namedNode; + return true; + } + + // A single-index _mapping request keys the body by the concrete index name; when the + // caller's logical name differs (prefix/normalization), fall back to the sole object entry. + foreach (var property in root.EnumerateObject()) + { + if (property.Value.ValueKind == JsonValueKind.Object) + { + indexNode = property.Value; + return true; + } + } + + indexNode = default; + return false; + } } diff --git a/test/Aevatar.CQRS.Projection.Core.Tests/ElasticsearchProjectionDocumentStoreBehaviorTests.cs b/test/Aevatar.CQRS.Projection.Core.Tests/ElasticsearchProjectionDocumentStoreBehaviorTests.cs index 9508424c9..991cd5603 100644 --- a/test/Aevatar.CQRS.Projection.Core.Tests/ElasticsearchProjectionDocumentStoreBehaviorTests.cs +++ b/test/Aevatar.CQRS.Projection.Core.Tests/ElasticsearchProjectionDocumentStoreBehaviorTests.cs @@ -70,7 +70,8 @@ public async Task QueryAsync_WhenSortFieldNotConfigured_ShouldUseProjectionDocum _ = await store.QueryAsync(new ProjectionDocumentQuery()); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.PathAndQuery.Should().EndWith("/_search"); searchRequest.Body.Should().Contain("\"sort\""); searchRequest.Body.Should().Contain("\"CreatedAt\""); @@ -85,6 +86,8 @@ public async Task QueryAsync_WhenSortFieldNotConfigured_ShouldUseProjectionDocum public async Task QueryAsync_WhenUsingClrFieldPaths_ShouldTranslateToProtoFieldNames() { var handler = new ScriptedHttpMessageHandler(); + handler.SetMappingResponse(_ => CreateMappingResponse( + """{"actor_id":{"type":"keyword"}}""")); handler.EnqueueResponse(_ => CreateJsonResponse( HttpStatusCode.OK, """{"hits":{"hits":[]}}""")); @@ -117,7 +120,8 @@ public async Task QueryAsync_WhenUsingClrFieldPaths_ShouldTranslateToProtoFieldN ], }); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.Body.Should().Contain("\"actor_id\":\"actor-1\""); searchRequest.Body.Should().Contain("\"updated_at_utc_value\""); searchRequest.Body.Should().NotContain("\"ActorId\""); @@ -303,7 +307,8 @@ public async Task QueryAsync_WhenUsingExplicitTimestampSort_ShouldIncludeMissing ], }); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.Body.Should().Contain("\"updated_at_utc_value\""); searchRequest.Body.Should().Contain("\"missing\":\"_last\""); searchRequest.Body.Should().Contain("\"unmapped_type\":\"date\""); @@ -336,45 +341,29 @@ public async Task QueryAsync_WhenUsingExplicitStringSort_ShouldUseKeywordUnmappe ], }); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.Body.Should().Contain("\"value\""); searchRequest.Body.Should().Contain("\"missing\":\"_last\""); searchRequest.Body.Should().Contain("\"unmapped_type\":\"keyword\""); } [Fact] - public async Task QueryAsync_WhenFieldHasExplicitKeywordMapping_ShouldNotAppendKeywordSuffix() + public async Task QueryAsync_WhenLiveIndexMappingIsKeyword_ShouldNotAppendKeywordSuffix() { var handler = new ScriptedHttpMessageHandler(); + handler.SetMappingResponse(_ => CreateMappingResponse( + """{"value":{"type":"keyword"}}""")); handler.EnqueueResponse(_ => CreateJsonResponse( HttpStatusCode.OK, """{"hits":{"hits":[]}}""")); - var options = new ElasticsearchProjectionDocumentStoreOptions - { - AutoCreateIndex = false, - }; - options.Endpoints = ["http://localhost:9200"]; - - using var store = new ElasticsearchProjectionDocumentStore( - options, - new DocumentIndexMetadata( - IndexName: "projection-core-tests", - Mappings: new Dictionary - { - ["properties"] = new Dictionary - { - ["value"] = new Dictionary - { - ["type"] = "keyword", - }, - }, - }, - Settings: new Dictionary(), - Aliases: new Dictionary()), - keySelector: model => model.Id, - keyFormatter: key => key, - httpMessageHandler: handler); + using var store = CreateStore( + new ElasticsearchProjectionDocumentStoreOptions + { + AutoCreateIndex = false, + }, + handler); _ = await store.QueryAsync(new ProjectionDocumentQuery { @@ -389,44 +378,28 @@ public async Task QueryAsync_WhenFieldHasExplicitKeywordMapping_ShouldNotAppendK ], }); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.Body.Should().Contain("\"value\":\"v1\""); searchRequest.Body.Should().NotContain("\"value.keyword\""); } [Fact] - public async Task QueryAsync_WhenFieldHasExplicitTextMappingWithoutKeyword_ShouldNotInventKeywordSuffix() + public async Task QueryAsync_WhenLiveIndexMappingIsTextWithoutKeyword_ShouldNotInventKeywordSuffix() { var handler = new ScriptedHttpMessageHandler(); + handler.SetMappingResponse(_ => CreateMappingResponse( + """{"value":{"type":"text"}}""")); handler.EnqueueResponse(_ => CreateJsonResponse( HttpStatusCode.OK, """{"hits":{"hits":[]}}""")); - var options = new ElasticsearchProjectionDocumentStoreOptions - { - AutoCreateIndex = false, - }; - options.Endpoints = ["http://localhost:9200"]; - - using var store = new ElasticsearchProjectionDocumentStore( - options, - new DocumentIndexMetadata( - IndexName: "projection-core-tests", - Mappings: new Dictionary - { - ["properties"] = new Dictionary - { - ["value"] = new Dictionary - { - ["type"] = "text", - }, - }, - }, - Settings: new Dictionary(), - Aliases: new Dictionary()), - keySelector: model => model.Id, - keyFormatter: key => key, - httpMessageHandler: handler); + using var store = CreateStore( + new ElasticsearchProjectionDocumentStoreOptions + { + AutoCreateIndex = false, + }, + handler); _ = await store.QueryAsync(new ProjectionDocumentQuery { @@ -441,7 +414,8 @@ public async Task QueryAsync_WhenFieldHasExplicitTextMappingWithoutKeyword_Shoul ], }); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.Body.Should().Contain("\"value\":\"v1\""); searchRequest.Body.Should().NotContain("\"value.keyword\""); } @@ -484,10 +458,118 @@ public async Task QueryAsync_WhenDescriptorContainsRecursiveWellKnownType_Should ], }); - var searchRequest = handler.CapturedRequests.Should().ContainSingle().Subject; + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; searchRequest.Body.Should().Contain("\"value.keyword\":\"v1\""); } + [Fact] + public async Task QueryAsync_WhenLiveIndexMapsAugmentedKeywordFieldAsTextMultiField_ShouldTargetKeywordSubfield() + { + // Regression for #743: `actor_id` is an `_id`-suffix field, so descriptor augmentation + // declares it `keyword` in code-side metadata. A projection index created before that + // augmentation shipped still carries ES's dynamic `text` + `.keyword` multi-field mapping. + // The exact-match term filter must target the physical `.keyword` sub-field, otherwise the + // term query hits the analyzed `text` field and never matches a UUID-shaped value. + var handler = new ScriptedHttpMessageHandler(); + handler.SetMappingResponse(_ => CreateMappingResponse( + """{"actor_id":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}""")); + handler.EnqueueResponse(_ => CreateJsonResponse( + HttpStatusCode.OK, + """{"hits":{"hits":[]}}""")); + + using var store = CreateStore( + new ElasticsearchProjectionDocumentStoreOptions + { + AutoCreateIndex = false, + }, + handler); + + _ = await store.QueryAsync(new ProjectionDocumentQuery + { + Filters = + [ + new ProjectionDocumentFilter + { + FieldPath = nameof(TestStoreReadModel.ActorId), + Operator = ProjectionDocumentFilterOperator.Eq, + Value = ProjectionDocumentValue.FromString("801667e9-772d-4bdf-8000-717ce331746c"), + }, + ], + }); + + handler.CapturedRequests[0].Method.Should().Be("GET"); + handler.CapturedRequests[0].PathAndQuery.Should().EndWith("/aevatar-projection-core-tests/_mapping"); + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; + searchRequest.Body.Should().Contain( + "\"actor_id.keyword\":\"801667e9-772d-4bdf-8000-717ce331746c\""); + searchRequest.Body.Should().NotContain("\"actor_id\":\"801667e9"); + } + + [Fact] + public async Task QueryAsync_WhenLiveIndexMappingProbeFails_ShouldFallBackToDeclaredMetadata() + { + // When the `_mapping` probe cannot read physical truth (here: an Elasticsearch 500), the + // resolver falls back to declared/augmented metadata — `actor_id` is augmented to `keyword`, + // so the term targets the bare field. This preserves pre-#743 behaviour on probe failure. + var handler = new ScriptedHttpMessageHandler(); + handler.SetMappingResponse(_ => CreateJsonResponse( + HttpStatusCode.InternalServerError, + """{"error":"mapping unavailable"}""")); + handler.EnqueueResponse(_ => CreateJsonResponse( + HttpStatusCode.OK, + """{"hits":{"hits":[]}}""")); + + using var store = CreateStore( + new ElasticsearchProjectionDocumentStoreOptions + { + AutoCreateIndex = false, + }, + handler); + + _ = await store.QueryAsync(new ProjectionDocumentQuery + { + Filters = + [ + new ProjectionDocumentFilter + { + FieldPath = nameof(TestStoreReadModel.ActorId), + Operator = ProjectionDocumentFilterOperator.Eq, + Value = ProjectionDocumentValue.FromString("actor-1"), + }, + ], + }); + + var searchRequest = handler.CapturedRequests + .Should().ContainSingle(r => r.PathAndQuery.EndsWith("/_search")).Subject; + searchRequest.Body.Should().Contain("\"actor_id\":\"actor-1\""); + searchRequest.Body.Should().NotContain("\"actor_id.keyword\""); + } + + [Fact] + public async Task QueryAsync_WhenCalledRepeatedly_ShouldProbeLiveIndexMappingOnce() + { + var handler = new ScriptedHttpMessageHandler(); + handler.SetMappingResponse(_ => CreateMappingResponse( + """{"value":{"type":"keyword"}}""")); + handler.EnqueueResponse(_ => CreateJsonResponse(HttpStatusCode.OK, """{"hits":{"hits":[]}}""")); + handler.EnqueueResponse(_ => CreateJsonResponse(HttpStatusCode.OK, """{"hits":{"hits":[]}}""")); + + using var store = CreateStore( + new ElasticsearchProjectionDocumentStoreOptions + { + AutoCreateIndex = false, + }, + handler); + + _ = await store.QueryAsync(new ProjectionDocumentQuery()); + _ = await store.QueryAsync(new ProjectionDocumentQuery()); + + handler.CapturedRequests.Count(r => r.PathAndQuery.EndsWith("/_mapping")).Should().Be(1); + handler.CapturedRequests.Count(r => r.PathAndQuery.EndsWith("/_search")).Should().Be(2); + } + [Fact] public async Task UpsertAsync_WhenMetadataContainsStructuredObjects_ShouldSendStructuredIndexInitializationPayload() { @@ -908,6 +990,19 @@ private static JsonElement GetFieldMapping(JsonElement indexPayload, string fiel return GetFieldMapping(indexPayload, fieldName).GetProperty("type").GetString(); } + // CreateStore / the explicit-metadata stores all resolve to this concrete index name + // (default "aevatar" prefix + "projection-core-tests" scope). + private const string TestIndexName = "aevatar-projection-core-tests"; + + // Builds an Elasticsearch `GET /_mapping` response body. `propertiesJson` is the raw + // JSON object placed under `mappings.properties` (e.g. {"value":{"type":"keyword"}}). + private static HttpResponseMessage CreateMappingResponse(string propertiesJson) + { + return CreateJsonResponse( + HttpStatusCode.OK, + "{\"" + TestIndexName + "\":{\"mappings\":{\"properties\":" + propertiesJson + "}}}"); + } + private static HttpResponseMessage CreateJsonResponse(HttpStatusCode statusCode, string json) { return new HttpResponseMessage(statusCode) @@ -919,6 +1014,7 @@ private static HttpResponseMessage CreateJsonResponse(HttpStatusCode statusCode, private sealed class ScriptedHttpMessageHandler : HttpMessageHandler { private readonly Queue> _responses = new(); + private Func? _mappingResponseFactory; public List CapturedRequests { get; } = []; @@ -927,6 +1023,15 @@ public void EnqueueResponse(Func respon _responses.Enqueue(responseFactory); } + // `GET /_mapping` is a transparent, idempotent probe issued by the query path to + // resolve keyword/text field paths from physical index truth. It is served from a dedicated + // slot so scripted `_search` sequences stay focused on the operation under test. Tests that + // exercise field-path resolution set an explicit mapping; the rest get an empty mapping. + public void SetMappingResponse(Func responseFactory) + { + _mappingResponseFactory = responseFactory; + } + protected override async Task SendAsync( HttpRequestMessage request, CancellationToken cancellationToken) @@ -940,6 +1045,12 @@ protected override async Task SendAsync( request.RequestUri?.PathAndQuery ?? "", requestBody)); + if (request.Method == HttpMethod.Get && + (request.RequestUri?.PathAndQuery ?? "").EndsWith("/_mapping", StringComparison.Ordinal)) + { + return (_mappingResponseFactory ?? DefaultMappingResponse).Invoke(request); + } + if (_responses.Count == 0) { throw new InvalidOperationException( @@ -948,6 +1059,11 @@ protected override async Task SendAsync( return _responses.Dequeue().Invoke(request); } + + private static HttpResponseMessage DefaultMappingResponse(HttpRequestMessage request) + { + return CreateMappingResponse("{}"); + } } private sealed record CapturedRequest(string Method, string PathAndQuery, string Body);