-
-
Notifications
You must be signed in to change notification settings - Fork 542
Description
Bogus NuGet Package
35.6.5
.NET Version
.NET 9
Visual Studio Version
rider 2025.3
What operating system are you using?
Windows
What locale are you using with Bogus?
default,
Problem Description
I was facing some issues regarding slow generation with a relatively big faker that had a lot of nested entities.
so I tried parallel generation using parallel for each and using parallel enumerable, but both seemed to not change any thing. after finding this issue I tried changing my code to override the PopulateInternal method but found no difference at all
here are my benchmarks results(using benchmark dotnet).
| Method | batchSize | Mean |
|---|---|---|
| 'Normal Generation' | 100 | 7.926 s |
| 'Parallel Generation' | 100 | 8.032 s |
| 'Normal Generation' | 1000 | 79.178 s |
| 'Parallel Generation' | 1000 | 79.887 s |
Generally both methods scale with O(n) performance with the generation count.
I discovered this when trying to generate some data for benchmarks, that's why the count was large, for tests faker was fast enough. but for benchmarks generating the data takes ore than doing all the DB operations and it really struggled for several minutes when generating more than 10K
Note
My methods like AddressFaker.Create() are just static methods that return fakers for every entity with the appropriate rules
LINQPad Example or Reproduction Steps
[Benchmark(Description = "Normal Generation")]
[ArgumentsSource(nameof(BatchSizes))]
public void Bench1(int batchSize)
{
var addressFaker = AddressFaker.Create();
var buyerFaker = CompanyFaker.Create()
.RuleFor(c => c.Addresses, (_, c) => addressFaker.RuleFor(a => a.CompanyId, c.EdcID).Generate(2));
var orderEventFaker = EventFaker.CreateOrderFaker();
var orderItemEventFaker = EventFaker.CreateOrderItemFaker();
var customFieldOrderFaker = CustomFieldFaker.CreateOrderFaker();
var customFieldOrderItemFaker = CustomFieldFaker.CreateOrderItemFaker();
var orderItemBaseFaker = OrderItemFaker.Create();
var orderFaker = OrderFaker.Create()
.RuleFor(o => o.OwnerId, _ => OwnerId)
.RuleFor(o => o.Buyer, _ => buyerFaker.Generate())
.RuleFor(o => o.BuyerID, (_, o) => o.Buyer!.ID)
.FinishWith((_, o) =>
{
o.Events = orderEventFaker.Generate(10);
o.Events.ForEach(cf => cf.OrderId = o.ID);
o.PackLines.ForEach(cf => cf.OrderId = o.ID);
o.CustomFields = customFieldOrderFaker.Generate(10);
o.CustomFields.ForEach(cf => cf.OrderId = o.ID);
var items = orderItemBaseFaker.Generate(2);
foreach (var item in items)
{
item.OrderId = o.ID;
item.OwnerId = OwnerId;
item.Events = orderItemEventFaker.Generate(10);
item.Events.ForEach(cf => cf.OrderItemId = item.ID);
item.CustomFields = customFieldOrderItemFaker.Generate(10);
item.CustomFields.ForEach(cf => cf.OrderItemId = item.ID);
}
o.OrderLines = items;
});
var orders = orderFaker.Generate(batchSize);
Console.WriteLine(orders.First().ID);
Console.WriteLine(orders.Last().ID);
}
[Benchmark(Description = "Parallel Generation")]
[ArgumentsSource(nameof(BatchSizes))]
public void Bench2(int batchSize)
{
const int threadCount = 5;
var batchSizePerThread = batchSize / threadCount;
var addressFaker = AddressFaker.Create();
var buyerFaker = CompanyFaker.Create()
.RuleFor(c => c.Addresses, (_, c) => addressFaker.RuleFor(a => a.CompanyId, c.ID).Generate(2));
var orderEventFaker = EventFaker.CreateOrderFaker();
var orderItemEventFaker = EventFaker.CreateOrderItemFaker();
var customFieldOrderFaker = CustomFieldFaker.CreateOrderFaker();
var customFieldOrderItemFaker = CustomFieldFaker.CreateOrderItemFaker();
var orderItemFaker = OrderItemFaker.Create()
.RuleFor(it => it.Events, _ => orderItemEventFaker.Generate(10))
.RuleFor(it => it.CustomFields, _ => customFieldOrderItemFaker.Generate(10));
var baseOrderFaker = OrderFaker.Create()
.RuleFor(o => o.OwnerId, _ => OwnerId)
.RuleFor(o => o.Buyer, _ => buyerFaker.Generate())
.RuleFor(o => o.BuyerID, (_, o) => o.Buyer!.ID)
.RuleFor(o => o.Events, _ => orderEventFaker.Generate(10))
.RuleFor(o => o.CustomFields, _ => customFieldOrderFaker.Generate(10))
.RuleFor(o => o.OrderLines, _ => orderItemFaker.Generate(2))
.FinishWith((_, o) =>
{
foreach (var ev in o.Events) ev.OrderId = o.ID;
foreach (var pl in o.PackLines) pl.OrderId = o.ID;
foreach (var cf in o.CustomFields) cf.OrderId = o.ID;
foreach (var item in o.OrderLines)
{
item.OrderId = o.ID;
item.OwnerId = OwnerId;
foreach (var ev in item.Events) ev.OrderItemId = item.ID;
foreach (var cf in item.CustomFields) cf.OrderItemId = item.ID;
}
});
var orders = ParallelEnumerable.Range(0, threadCount)
.SelectMany(_ =>baseOrderFaker.Clone().Generate(batchSizePerThread)).ToList();
Console.WriteLine(orders.First().ID);
Console.WriteLine(orders.Last().ID);
}Expected Behavior
parallel generation should be faster
Actual Behavior
both methods perform the same and scale the same
Known Workarounds
No response
Could you help with a pull-request?
Yes