Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 14 additions & 38 deletions lib/sentry/opentelemetry/span_processor.ex
Original file line number Diff line number Diff line change
Expand Up @@ -37,50 +37,24 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
end

defp process_span(span_record) do
transaction_root? =
cond do
# No parent = definitely a root
span_record.parent_span_id == nil ->
true

# Has a parent - check if it's local or remote
has_local_parent_span?(span_record.parent_span_id) ->
# Parent exists locally - this is a child span, not a transaction root
false
parent_id = span_record.parent_span_id

true ->
# Parent is remote (distributed tracing) - treat server spans as transaction roots
server_span?(span_record)
end

if transaction_root? do
build_and_send_transaction(span_record)
else
# Only skip spans that have a local parent span, as it will be bundled up when the parent span is processed.
# Spans with remote parents (e.g. from another service) or spans whose parent has already been processed
# (e.g. async continuations) should still be processed as their own transaction segments to avoid dropping them.
if parent_id && SpanStorage.span_exists?(parent_id) do
true
else
build_and_send_transaction(span_record)
end
end

defp has_local_parent_span?(parent_span_id) do
SpanStorage.span_exists?(parent_span_id)
end

# Check if it's an HTTP server request span or a LiveView span
defp server_span?(%{kind: :server} = span_record) do
http_server_span?(span_record) or liveview_span?(span_record)
end

defp server_span?(_), do: false

defp http_server_span?(%{kind: :server, attributes: attributes}) do
Map.has_key?(attributes, to_string(HTTPAttributes.http_request_method()))
end

# Check if span name matches LiveView lifecycle patterns
defp liveview_span?(%{origin: "opentelemetry_phoenix"}), do: true
defp liveview_span?(_), do: false

defp build_and_send_transaction(span_record) do
child_span_records = SpanStorage.get_child_spans(span_record.span_id)
child_span_records =
span_record.span_id
|> SpanStorage.get_child_spans()
|> Enum.filter(&span_complete?/1)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Descendant spans can be sent in duplicate transactions

Medium Severity

The span_complete? filter excludes incomplete children from a transaction but still includes their complete descendants (because get_all_descendants recurses through incomplete nodes). When the incomplete child later finishes and its parent is gone from storage, it becomes its own transaction root and re-collects those same descendants — sending them to Sentry twice. The remove_child_spans cleanup doesn't prevent this because it only removes direct children, not nested descendants.

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FugiTech this sounds legit, WDYT? I was wondering if we should just halt pruning of roots for as long as there are in-progress children.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I believe this is a legit problem with the PR as it stands today. Codex came up with FugiTech@16924b4 to solve it, which I've been running in production for a bit and seems to work?

But the core issue is I'm not actually that familiar with the Sentry Elixir SDK so I don't know what the proper fix is. Delaying emitting the entire transaction until all children are done probably wouldn't be good enough as children could start again after that point (delayed tasks, oban, handle_event in liveviews, etc). But I don't fully understand the SpanStorage/batching characteristics of this flow.

If you're OK with it, I'd love for you to take over and implement a fix however you see fit. You're welcome to use this PR and the commit I linked above as inspiration, but I just lack the confidence to say that it'll properly fix the issue and not introduce other weird behaviors.


transaction = build_transaction(span_record, child_span_records)

result =
Expand Down Expand Up @@ -108,6 +82,8 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
result
end

defp span_complete?(span_record), do: not is_nil(span_record.end_time)

defp build_transaction(root_span_record, child_span_records) do
root_span = build_span(root_span_record)
child_spans = Enum.map(child_span_records, &build_span(&1))
Expand Down
45 changes: 45 additions & 0 deletions test/sentry/opentelemetry/span_processor_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -820,5 +820,50 @@ defmodule Sentry.Opentelemetry.SpanProcessorTest do
refute SpanStorage.span_exists?("parent_span", table_name: table_name)
refute SpanStorage.span_exists?("completed_child", table_name: table_name)
end

@tag span_storage: true
test "async continuation spans are emitted as their own transaction segments" do
put_test_config(environment_name: "test", traces_sample_rate: 1.0)

Sentry.Test.start_collecting_sentry_reports()

parent_ctx =
Tracer.with_span "sync_root" do
:otel_ctx.get_current()
end

Task.async(fn ->
Process.sleep(25)

token = :otel_ctx.attach(parent_ctx)

try do
Tracer.with_span "async_parent" do
Tracer.with_span "async_child" do
Process.sleep(1)
end
end
after
:otel_ctx.detach(token)
end
end)
|> Task.await()

transactions = Sentry.Test.pop_sentry_transactions()

assert Enum.any?(transactions, &(&1.transaction == "sync_root"))

async_parent_txn =
Enum.find(transactions, fn transaction ->
transaction.transaction == "async_parent"
end)

assert async_parent_txn != nil
assert length(async_parent_txn.spans) == 1

[async_child_span] = async_parent_txn.spans
assert async_child_span.op == "async_child"
assert async_child_span.parent_span_id == async_parent_txn.contexts.trace.span_id
end
end
end