Skip to content

Commit b9e9cfd

Browse files
committed
Reduce allocations for StackLineInfo and StackAddressInfo.
Before: https://share.firefox.dev/3NnthpO After: https://share.firefox.dev/45dMNv1 getStackLineInfo now allocates 46MB instead of 220MB (4.8x reduction), and takes 146ms instead of 338ms (2.3x reduction), when opening the source view for mozjemalloc.cpp on https://share.firefox.dev/3NnzEcF
1 parent 66ea79f commit b9e9cfd

File tree

7 files changed

+716
-154
lines changed

7 files changed

+716
-154
lines changed

src/profile-logic/address-timings.ts

Lines changed: 75 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ import type {
7676
StackAddressInfo,
7777
AddressTimings,
7878
Address,
79+
IndexIntoAddressSetTable,
7980
} from 'firefox-profiler/types';
81+
import { SetCollectionBuilder } from 'firefox-profiler/utils/set-collection';
8082

8183
/**
8284
* For each stack in `stackTable`, and one specific native symbol, compute the
@@ -112,67 +114,38 @@ import type {
112114
* If there is recursion, and the same address is present in multiple frames in
113115
* the same stack, the address is only counted once - the addresses are stored
114116
* in a set.
115-
*
116-
* The returned StackAddressInfo is computed as follows:
117-
* selfAddress[stack]:
118-
* For stacks whose stack.frame.nativeSymbol is the given native symbol,
119-
* this is stack.frame.address.
120-
* For all other stacks this is null.
121-
* stackAddresses[stack]:
122-
* For stacks whose stack.frame.nativeSymbol is the given native symbol,
123-
* this is the stackAddresses of its prefix stack, plus stack.frame.address
124-
* added to the set.
125-
* For all other stacks this is the same as the stackAddresses set of the
126-
* stack's prefix.
127117
*/
128118
export function getStackAddressInfo(
129119
stackTable: StackTable,
130120
frameTable: FrameTable,
131121
_funcTable: FuncTable,
132122
nativeSymbol: IndexIntoNativeSymbolTable
133123
): StackAddressInfo {
134-
// "self address" == "the address which a stack's self time is contributed to"
135-
const selfAddressForAllStacks = [];
136-
// "total addresses" == "the set of addresses whose total time this stack contributes to"
137-
const totalAddressesForAllStacks: Array<Set<Address> | null> = [];
124+
const builder = new SetCollectionBuilder<number>();
125+
const stackIndexToAddressSetIndex = new Int32Array(stackTable.length);
138126

139-
// This loop takes advantage of the fact that the stack table is topologically ordered:
140-
// Prefix stacks are always visited before their descendants.
141-
// Each stack inherits the "total" addresses from its parent stack, and then adds its
142-
// self address to that set. If the stack doesn't have a self address in the library, we just
143-
// re-use the prefix's set object without copying it.
144127
for (let stackIndex = 0; stackIndex < stackTable.length; stackIndex++) {
145-
const frame = stackTable.frame[stackIndex];
146128
const prefixStack = stackTable.prefix[stackIndex];
147-
const nativeSymbolOfThisStack = frameTable.nativeSymbol[frame];
129+
const prefixAddressSet: IndexIntoAddressSetTable | -1 =
130+
prefixStack !== null ? stackIndexToAddressSetIndex[prefixStack] : -1;
148131

149-
let selfAddress: Address | null = null;
150-
let totalAddresses: Set<Address> | null =
151-
prefixStack !== null ? totalAddressesForAllStacks[prefixStack] : null;
132+
const frame = stackTable.frame[stackIndex];
133+
const nativeSymbolOfThisStack = frameTable.nativeSymbol[frame];
134+
const matchesNativeSymbol = nativeSymbolOfThisStack === nativeSymbol;
135+
if (prefixAddressSet === -1 && !matchesNativeSymbol) {
136+
stackIndexToAddressSetIndex[stackIndex] = -1;
137+
} else {
138+
const selfAddress = matchesNativeSymbol ? frameTable.address[frame] : -1;
152139

153-
if (nativeSymbolOfThisStack === nativeSymbol) {
154-
selfAddress = frameTable.address[frame];
155-
if (selfAddress !== -1) {
156-
// Add this stack's address to this stack's totalAddresses. The rest of this stack's
157-
// totalAddresses is the same as for the parent stack.
158-
// We avoid creating new Set objects unless the new set is actually
159-
// different.
160-
if (totalAddresses === null) {
161-
// None of the ancestor stack nodes have hit a address in the given library.
162-
totalAddresses = new Set([selfAddress]);
163-
} else if (!totalAddresses.has(selfAddress)) {
164-
totalAddresses = new Set(totalAddresses);
165-
totalAddresses.add(selfAddress);
166-
}
167-
}
140+
stackIndexToAddressSetIndex[stackIndex] = builder.extend(
141+
prefixAddressSet !== -1 ? prefixAddressSet : null,
142+
selfAddress
143+
);
168144
}
169-
170-
selfAddressForAllStacks.push(selfAddress);
171-
totalAddressesForAllStacks.push(totalAddresses);
172145
}
173146
return {
174-
selfAddress: selfAddressForAllStacks,
175-
stackAddresses: totalAddressesForAllStacks,
147+
stackIndexToAddressSetIndex,
148+
addressSetTable: builder.finish(),
176149
};
177150
}
178151

@@ -192,30 +165,70 @@ export function getAddressTimings(
192165
if (stackAddressInfo === null) {
193166
return emptyAddressTimings;
194167
}
195-
const { selfAddress, stackAddresses } = stackAddressInfo;
196-
const totalAddressHits: Map<Address, number> = new Map();
197-
const selfAddressHits: Map<Address, number> = new Map();
198168

199-
// Iterate over all the samples, and aggregate the sample's weight into the
200-
// addresses which are hit by the sample's stack.
201-
// TODO: Maybe aggregate sample count per stack first, and then visit each stack only once?
169+
const { stackIndexToAddressSetIndex, addressSetTable } = stackAddressInfo;
170+
171+
// We do two passes to compute the timings:
172+
// 1. One pass over the samples to accumulate the sample weight onto the
173+
// nodes in the addressSetTable.
174+
// 2. One pass (from back to front) over the addressSetTable, propagating
175+
// values up the tree and, at the same time, accumulating per-address
176+
// totals.
177+
178+
// First, do the pass over the samples to compute the weight per address set.
179+
const selfPerAddressSet = new Float64Array(addressSetTable.length);
202180
for (let sampleIndex = 0; sampleIndex < samples.length; sampleIndex++) {
203181
const stackIndex = samples.stack[sampleIndex];
204182
if (stackIndex === null) {
205183
continue;
206184
}
207-
const weight = samples.weight ? samples.weight[sampleIndex] : 1;
208-
const setOfHitAddresses = stackAddresses[stackIndex];
209-
if (setOfHitAddresses !== null) {
210-
for (const address of setOfHitAddresses) {
211-
const oldHitCount = totalAddressHits.get(address) ?? 0;
212-
totalAddressHits.set(address, oldHitCount + weight);
185+
const addressSetIndex = stackIndexToAddressSetIndex[stackIndex];
186+
if (addressSetIndex !== -1) {
187+
const weight = samples.weight ? samples.weight[sampleIndex] : 1;
188+
selfPerAddressSet[addressSetIndex] += weight;
189+
}
190+
}
191+
192+
// Now, do a pass over the addressSetTable, from back to front.
193+
// This is a similar idea to what we do for the call tree or the function
194+
// list. The upwards propagation of a sample's weight will not contribute
195+
// to the same address multiple times thanks to the guarantees of the
196+
// addressSetTable - there are no duplicate values on a node's path to the
197+
// root.
198+
const totalAddressHits: Map<Address, number> = new Map();
199+
const selfAddressHits: Map<Address, number> = new Map();
200+
const selfSumOfAddressSetDescendants = new Float64Array(
201+
addressSetTable.length
202+
);
203+
for (
204+
let addressSetIndex = addressSetTable.length - 1;
205+
addressSetIndex >= 0;
206+
addressSetIndex--
207+
) {
208+
const selfWeight = selfPerAddressSet[addressSetIndex];
209+
if (selfWeight !== 0) {
210+
const selfAddress = addressSetTable.self[addressSetIndex];
211+
if (selfAddress !== -1) {
212+
const oldHitCount = selfAddressHits.get(selfAddress) ?? 0;
213+
selfAddressHits.set(selfAddress, oldHitCount + selfWeight);
213214
}
214215
}
215-
const address = selfAddress[stackIndex];
216-
if (address !== null) {
217-
const oldHitCount = selfAddressHits.get(address) ?? 0;
218-
selfAddressHits.set(address, oldHitCount + weight);
216+
217+
const selfSumOfThisAddressSetDescendants =
218+
selfSumOfAddressSetDescendants[addressSetIndex];
219+
const thisAddressSetWeight =
220+
selfWeight + selfSumOfThisAddressSetDescendants;
221+
const addressSetParent = addressSetTable.parent[addressSetIndex];
222+
if (addressSetParent !== null) {
223+
selfSumOfAddressSetDescendants[addressSetParent] += thisAddressSetWeight;
224+
}
225+
226+
if (thisAddressSetWeight !== 0) {
227+
const address = addressSetTable.value[addressSetIndex];
228+
if (address !== -1) {
229+
const oldHitCount = totalAddressHits.get(address) ?? 0;
230+
totalAddressHits.set(address, oldHitCount + thisAddressSetWeight);
231+
}
219232
}
220233
}
221234
return { totalAddressHits, selfAddressHits };

src/profile-logic/line-timings.ts

Lines changed: 74 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import type {
1111
LineTimings,
1212
LineNumber,
1313
IndexIntoSourceTable,
14+
IndexIntoLineSetTable,
1415
} from 'firefox-profiler/types';
16+
import { SetCollectionBuilder } from 'firefox-profiler/utils/set-collection';
1517

1618
/**
1719
* For each stack in `stackTable`, and one specific source file, compute the
@@ -40,69 +42,41 @@ import type {
4042
* This last line is the stack's "self line".
4143
* If there is recursion, and the same line is present in multiple frames in the
4244
* same stack, the line is only counted once - the lines are stored in a set.
43-
*
44-
* The returned StackLineInfo is computed as follows:
45-
* selfLine[stack]:
46-
* For stacks whose stack.frame.func.file is the given file, this is stack.frame.line.
47-
* For all other stacks this is null.
48-
* stackLines[stack]:
49-
* For stacks whose stack.frame.func.file is the given file, this is the stackLines
50-
* of its prefix stack, plus stack.frame.line added to the set.
51-
* For all other stacks this is the same as the stackLines set of the stack's prefix.
5245
*/
5346
export function getStackLineInfo(
5447
stackTable: StackTable,
5548
frameTable: FrameTable,
5649
funcTable: FuncTable,
5750
sourceViewSourceIndex: IndexIntoSourceTable
5851
): StackLineInfo {
59-
// "self line" == "the line which a stack's self time is contributed to"
60-
const selfLineForAllStacks = [];
61-
// "total lines" == "the set of lines whose total time this stack contributes to"
62-
const totalLinesForAllStacks: Array<Set<LineNumber> | null> = [];
63-
64-
// This loop takes advantage of the fact that the stack table is topologically ordered:
65-
// Prefix stacks are always visited before their descendants.
66-
// Each stack inherits the "total" lines from its parent stack, and then adds its
67-
// self line to that set. If the stack doesn't have a self line in the file, we just
68-
// re-use the prefix's set object without copying it.
52+
const builder = new SetCollectionBuilder<number>();
53+
const stackIndexToLineSetIndex = new Int32Array(stackTable.length);
54+
6955
for (let stackIndex = 0; stackIndex < stackTable.length; stackIndex++) {
70-
const frame = stackTable.frame[stackIndex];
7156
const prefixStack = stackTable.prefix[stackIndex];
57+
const prefixLineSet: IndexIntoLineSetTable | -1 =
58+
prefixStack !== null ? stackIndexToLineSetIndex[prefixStack] : -1;
59+
60+
const frame = stackTable.frame[stackIndex];
7261
const func = frameTable.func[frame];
7362
const sourceIndexOfThisStack = funcTable.source[func];
63+
const matchesSource = sourceIndexOfThisStack === sourceViewSourceIndex;
64+
if (prefixLineSet === -1 && !matchesSource) {
65+
stackIndexToLineSetIndex[stackIndex] = -1;
66+
} else {
67+
const selfLineOrNull = matchesSource
68+
? (frameTable.line[frame] ?? funcTable.lineNumber[func])
69+
: null;
7470

75-
let selfLine: LineNumber | null = null;
76-
let totalLines: Set<LineNumber> | null =
77-
prefixStack !== null ? totalLinesForAllStacks[prefixStack] : null;
78-
79-
if (sourceIndexOfThisStack === sourceViewSourceIndex) {
80-
selfLine = frameTable.line[frame];
81-
// Fallback to func line info if frame line info is not available
82-
if (selfLine === null) {
83-
selfLine = funcTable.lineNumber[func];
84-
}
85-
if (selfLine !== null) {
86-
// Add this stack's line to this stack's totalLines. The rest of this stack's
87-
// totalLines is the same as for the parent stack.
88-
// We avoid creating new Set objects unless the new set is actually
89-
// different.
90-
if (totalLines === null) {
91-
// None of the ancestor stack nodes have hit a line in the given file.
92-
totalLines = new Set([selfLine]);
93-
} else if (!totalLines.has(selfLine)) {
94-
totalLines = new Set(totalLines);
95-
totalLines.add(selfLine);
96-
}
97-
}
71+
stackIndexToLineSetIndex[stackIndex] = builder.extend(
72+
prefixLineSet !== -1 ? prefixLineSet : null,
73+
selfLineOrNull !== null ? selfLineOrNull : -1
74+
);
9875
}
99-
100-
selfLineForAllStacks.push(selfLine);
101-
totalLinesForAllStacks.push(totalLines);
10276
}
10377
return {
104-
selfLine: selfLineForAllStacks,
105-
stackLines: totalLinesForAllStacks,
78+
stackIndexToLineSetIndex,
79+
lineSetTable: builder.finish(),
10680
};
10781
}
10882

@@ -122,30 +96,66 @@ export function getLineTimings(
12296
if (stackLineInfo === null) {
12397
return emptyLineTimings;
12498
}
125-
const { selfLine, stackLines } = stackLineInfo;
126-
const totalLineHits: Map<LineNumber, number> = new Map();
127-
const selfLineHits: Map<LineNumber, number> = new Map();
99+
const { stackIndexToLineSetIndex, lineSetTable } = stackLineInfo;
100+
101+
// We do two passes to compute the timings:
102+
// 1. One pass over the samples to accumulate the sample weight onto the
103+
// nodes in the lineSetTable.
104+
// 2. One pass (from back to front) over the lineSetTable, propagating
105+
// values up the tree and, at the same time, accumulating per-line
106+
// totals.
128107

129-
// Iterate over all the samples, and aggregate the sample's weight into the
130-
// lines which are hit by the sample's stack.
131-
// TODO: Maybe aggregate sample count per stack first, and then visit each stack only once?
108+
// First, do the pass over the samples to compute the weight per line set.
109+
const selfPerLineSet = new Float64Array(lineSetTable.length);
132110
for (let sampleIndex = 0; sampleIndex < samples.length; sampleIndex++) {
133111
const stackIndex = samples.stack[sampleIndex];
134112
if (stackIndex === null) {
135113
continue;
136114
}
137-
const weight = samples.weight ? samples.weight[sampleIndex] : 1;
138-
const setOfHitLines = stackLines[stackIndex];
139-
if (setOfHitLines !== null) {
140-
for (const line of setOfHitLines) {
141-
const oldHitCount = totalLineHits.get(line) ?? 0;
142-
totalLineHits.set(line, oldHitCount + weight);
115+
const lineSetIndex = stackIndexToLineSetIndex[stackIndex];
116+
if (lineSetIndex !== -1) {
117+
const weight = samples.weight ? samples.weight[sampleIndex] : 1;
118+
selfPerLineSet[lineSetIndex] += weight;
119+
}
120+
}
121+
122+
// Now, do a pass over the lineSetTable, from back to front.
123+
// This is a similar idea to what we do for the call tree or the function
124+
// list. The upwards propagation of a sample's weight will not contribute
125+
// to the same line multiple times thanks to the guarantees of the
126+
// lineSetTable - there are no duplicate values on a node's path to the
127+
// root.
128+
const totalLineHits: Map<LineNumber, number> = new Map();
129+
const selfLineHits: Map<LineNumber, number> = new Map();
130+
const selfSumOfLineSetDescendants = new Float64Array(lineSetTable.length);
131+
for (
132+
let lineSetIndex = lineSetTable.length - 1;
133+
lineSetIndex >= 0;
134+
lineSetIndex--
135+
) {
136+
const selfWeight = selfPerLineSet[lineSetIndex];
137+
if (selfWeight !== 0) {
138+
const selfLine = lineSetTable.self[lineSetIndex];
139+
if (selfLine !== -1) {
140+
const oldHitCount = selfLineHits.get(selfLine) ?? 0;
141+
selfLineHits.set(selfLine, oldHitCount + selfWeight);
143142
}
144143
}
145-
const line = selfLine[stackIndex];
146-
if (line !== null) {
147-
const oldHitCount = selfLineHits.get(line) ?? 0;
148-
selfLineHits.set(line, oldHitCount + weight);
144+
145+
const selfSumOfThisLineSetDescendants =
146+
selfSumOfLineSetDescendants[lineSetIndex];
147+
const thisLineSetWeight = selfWeight + selfSumOfThisLineSetDescendants;
148+
const lineSetParent = lineSetTable.parent[lineSetIndex];
149+
if (lineSetParent !== null) {
150+
selfSumOfLineSetDescendants[lineSetParent] += thisLineSetWeight;
151+
}
152+
153+
if (thisLineSetWeight !== 0) {
154+
const line = lineSetTable.value[lineSetIndex];
155+
if (line !== -1) {
156+
const oldHitCount = totalLineHits.get(line) ?? 0;
157+
totalLineHits.set(line, oldHitCount + thisLineSetWeight);
158+
}
149159
}
150160
}
151161
return { totalLineHits, selfLineHits };

src/test/unit/address-timings.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ describe('getStackAddressInfo', function () {
4444

4545
// Expect the returned arrays to have the same length as the stackTable.
4646
expect(stackTable.length).toBe(9);
47-
expect(stackLineInfoOne.selfAddress.length).toBe(9);
48-
expect(stackLineInfoOne.stackAddresses.length).toBe(9);
47+
expect(stackLineInfoOne.stackIndexToAddressSetIndex.length).toBe(9);
48+
expect(stackLineInfoOne.addressSetTable.length).toBe(4);
4949
});
5050
});
5151

src/test/unit/line-timings.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ describe('getStackLineInfo', function () {
4444

4545
// Expect the returned arrays to have the same length as the stackTable.
4646
expect(stackTable.length).toBe(9);
47-
expect(stackLineInfoOne.selfLine.length).toBe(9);
48-
expect(stackLineInfoOne.stackLines.length).toBe(9);
47+
expect(stackLineInfoOne.lineSetTable.length).toBe(7);
48+
// expect(stackLineInfoOne.stackLines.length).toBe(9);
4949
});
5050
});
5151

0 commit comments

Comments
 (0)