Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/trace_processor/importers/proto/gpu_event_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ using protos::pbzero::VulkanMemoryEvent;
constexpr auto kRenderStageBlueprint = TrackCompressor::SliceBlueprint(
"gpu_render_stage",
tracks::DimensionBlueprints(
tracks::kGpuDimensionBlueprint,
tracks::StringDimensionBlueprint("render_stage_source"),
tracks::UintDimensionBlueprint("hwqueue_id"),
tracks::StringIdDimensionBlueprint("hwqueue_name")),
Expand Down Expand Up @@ -283,6 +284,7 @@ StringId GpuEventParser::GetFullStageName(
}

void GpuEventParser::InsertTrackForUninternedRenderStage(
uint32_t gpu_id,
uint32_t hw_queue_id,
const GpuRenderStageEvent::Specifications::Description::Decoder& hw_queue) {
if (!hw_queue.has_name()) {
Expand Down Expand Up @@ -314,7 +316,7 @@ void GpuEventParser::InsertTrackForUninternedRenderStage(

auto factory = context_->track_compressor->CreateTrackFactory(
kRenderStageBlueprint,
tracks::Dimensions("id", hw_queue_id, kNullStringId),
tracks::Dimensions(gpu_id, "id", hw_queue_id, kNullStringId),
tracks::DynamicName(name),
[&, this](ArgsTracker::BoundInserter& inserter) {
inserter.AddArg(description_id_, Variadic::String(description));
Expand Down Expand Up @@ -379,12 +381,14 @@ void GpuEventParser::ParseGpuRenderStageEvent(
GpuRenderStageEvent::Decoder event(blob);

int32_t pid = 0;
uint32_t gpu_id =
event.has_gpu_id() ? static_cast<uint32_t>(event.gpu_id()) : 0;
if (event.has_specifications()) {
GpuRenderStageEvent::Specifications::Decoder spec(event.specifications());
uint32_t hw_queue_id = 0;
for (auto it = spec.hw_queue(); it; ++it) {
GpuRenderStageEvent::Specifications::Description::Decoder hw_queue(*it);
InsertTrackForUninternedRenderStage(hw_queue_id++, hw_queue);
InsertTrackForUninternedRenderStage(gpu_id, hw_queue_id++, hw_queue);
}
for (auto it = spec.stage(); it; ++it) {
GpuRenderStageEvent::Specifications::Description::Decoder stage(*it);
Expand Down Expand Up @@ -477,7 +481,7 @@ void GpuEventParser::ParseGpuRenderStageEvent(
: kNullStringId;
TrackId track_id = context_->track_compressor->InternScoped(
kRenderStageBlueprint,
tracks::Dimensions(base::StringView(source),
tracks::Dimensions(gpu_id, base::StringView(source),
static_cast<uint32_t>(hw_queue_id), dimension_name),
ts, static_cast<int64_t>(event.duration()),
tracks::DynamicName(track_name),
Expand Down
1 change: 1 addition & 0 deletions src/trace_processor/importers/proto/gpu_event_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class GpuEventParser {
PacketSequenceStateGeneration* sequence_state,
const protos::pbzero::GpuRenderStageEvent_Decoder& event) const;
void InsertTrackForUninternedRenderStage(
uint32_t gpu_id,
uint32_t id,
const protos::pbzero::GpuRenderStageEvent::Specifications::Description::
Decoder&);
Expand Down
54 changes: 49 additions & 5 deletions ui/src/plugins/dev.perfetto.TraceProcessorTrack/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {

private groups = new Map<string, TrackNode>();
private store?: Store<TraceProcessorTrackPluginState>;
private gpuCount = 0;

private migrateTraceProcessorTrackPluginState(
init: unknown,
Expand All @@ -104,6 +105,15 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
this.store = ctx.mountStore(TraceProcessorTrackPlugin.id, (init) =>
this.migrateTraceProcessorTrackPluginState(init),
);

// Query how many distinct GPU IDs exist in the trace.
const gpuCountResult = await ctx.engine.query(`
select count(distinct extract_arg(dimension_arg_set_id, 'gpu')) as cnt
from track
where extract_arg(dimension_arg_set_id, 'gpu') is not null
`);
this.gpuCount = gpuCountResult.firstRow({cnt: NUM}).cnt;

await this.addCounters(ctx);
await this.addSlices(ctx);
this.addAggregations(ctx);
Expand All @@ -124,6 +134,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
ct.machine_id as machine,
extract_arg(ct.dimension_arg_set_id, 'utid') as utid,
extract_arg(ct.dimension_arg_set_id, 'upid') as upid,
extract_arg(ct.dimension_arg_set_id, 'gpu') as gpu_id,
extract_arg(ct.source_arg_set_id, 'description') as description
from counter_track ct
join _counter_track_summary using (id)
Expand Down Expand Up @@ -153,6 +164,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
unit: STR_NULL,
utid: NUM_NULL,
upid: NUM_NULL,
gpu_id: NUM_NULL,
threadName: STR_NULL,
processName: STR_NULL,
tid: LONG_NULL,
Expand All @@ -170,6 +182,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
unit,
utid,
upid,
gpu_id: gpuId,
threadName,
processName,
tid,
Expand Down Expand Up @@ -232,6 +245,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
group,
upid,
utid,
gpuId,
new TrackNode({
uri,
name: trackName,
Expand Down Expand Up @@ -264,6 +278,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
lower(min(t.name)) as lower_name,
extract_arg(t.dimension_arg_set_id, 'utid') as utid,
extract_arg(t.dimension_arg_set_id, 'upid') as upid,
extract_arg(t.dimension_arg_set_id, 'gpu') as gpu_id,
extract_arg(t.source_arg_set_id, 'description') as description,
min(t.id) minTrackId,
group_concat(t.id) as trackIds,
Expand All @@ -277,13 +292,14 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
from _slice_track_summary s
join track t using (id)
left join _track_event_tracks_with_callstacks cs on cs.track_id = t.id
group by type, upid, utid, t.track_group_id, ifnull(t.track_group_id, t.id)
group by type, upid, utid, gpu_id, t.track_group_id, ifnull(t.track_group_id, t.id)
)
select
s.type,
s.name,
s.utid,
ifnull(s.upid, tp.upid) as upid,
s.gpu_id,
s.minTrackId as minTrackId,
s.trackIds as trackIds,
s.trackCount,
Expand Down Expand Up @@ -330,6 +346,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
name: STR_NULL,
utid: NUM_NULL,
upid: NUM_NULL,
gpu_id: NUM_NULL,
trackIds: STR,
maxDepth: NUM,
tid: LONG_NULL,
Expand All @@ -351,6 +368,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
maxDepth,
utid,
upid,
gpu_id: gpuId,
threadName,
processName,
tid,
Expand Down Expand Up @@ -417,6 +435,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
group,
upid,
utid,
gpuId,

new TrackNode({
uri,
Expand All @@ -436,6 +455,7 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
group: string | TrackGroupSchema | undefined,
upid: number | null,
utid: number | null,
gpuId: number | null,
track: TrackNode,
) {
switch (topLevelGroup) {
Expand Down Expand Up @@ -466,10 +486,34 @@ export default class TraceProcessorTrackPlugin implements PerfettoPlugin {
break;
}
default: {
const standardGroup = ctx.plugins
.getPlugin(StandardGroupsPlugin)
.getOrCreateStandardGroup(ctx.defaultWorkspace, topLevelGroup);
this.getGroupByName(standardGroup, group, null).addChildInOrder(track);
const standardGroupsPlugin =
ctx.plugins.getPlugin(StandardGroupsPlugin);
const standardGroup = standardGroupsPlugin.getOrCreateStandardGroup(
ctx.defaultWorkspace,
topLevelGroup,
);

// For multi-GPU traces, create per-GPU sub-groups within each
// group (e.g., "GPU 0 Counters" inside "Counters").
if (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not pollute the trace processor track plugin with custom code like this. If you need this, you should remove it from this plugin and write a new plugin (or some existing GPU plugin) and add this there.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. We already have a plugin for GPU Frequency. I'll see how things look if I add one for GPU Counters and one for GPU Hardware Queues.

topLevelGroup === 'GPU' &&
gpuId !== null &&
group !== undefined &&
this.gpuCount > 1
) {
const groupName = typeof group === 'string' ? group : group.name;
const parentGroup = this.getGroupByName(standardGroup, group, null);
const gpuSubGroup = this.getGroupByName(
parentGroup,
`GPU ${gpuId} ${groupName}`,
gpuId,
);
gpuSubGroup.addChildInOrder(track);
} else {
this.getGroupByName(standardGroup, group, null).addChildInOrder(
track,
);
}
break;
}
}
Expand Down
Loading