Skip to content

Commit

Permalink
[PROF-10123] Add placeholder for missing allocations in allocation pr…
Browse files Browse the repository at this point in the history
…ofiler

**What does this PR do?**

This PR fixes a corner case issue with the allocation profiler: if too
many objects get allocated, we "clamp" the weight of every individual
sample (to avoid introducing bias) to a maximum value.

But we previously did not in any way represent the clamping on the
profile itself, which meant that the total number of allocations
reported by the profiler would be missing any object counts that
were clamped.

In practice, we don't expect this to happen very often, as we have
a high clamping value `MAX_ALLOC_WEIGHT = 65535` (it's in
`collectors_cpu_and_wall_time_worker.c`).

This PR then introduces an extra placeholder sample, when needed,
to represent allocations that went over the clamped value.

Thus the total amount of objects allocated reported by the profiler
now becomes accurate, even in the presence of clamping.

**Motivation:**

We're considering lowering the maximum clamping value, which would mean
it would be hit more often, and thus it's important to fix this issue
before we do that.

**Additional Notes:**

N/A

**How to test the change?**

I've added test coverage for this change. I also used the following
snippet to quickly experiment while I was developing this PR:

```ruby
Datadog::Profiling.wait_until_running
allocator = Thread.new { loop { BasicObject.new } }
sleep 10
allocator.kill; allocator.join
puts "Allocated #{GC.stat(:total_allocated_objects)} objects total"
```
  • Loading branch information
ivoanjo committed Jul 5, 2024
1 parent da8b886 commit 67b9633
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1140,9 +1140,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
// if we aren't, then we're sampling every event
1;
// TODO: Signal in the profile that clamping happened?

// To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
// assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
// ...but we still represent the missing allocations in the profile, thus the data will account for all allocations.
if (weight < allocations_since_last_sample) {
thread_context_collector_sample_missed_allocations(state->thread_context_collector_instance, allocations_since_last_sample - weight);
}

// Return a dummy VALUE because we're called from rb_rescue2 which requires it
return Qnil;
Expand Down
31 changes: 31 additions & 0 deletions ext/datadog_profiling_native_extension/collectors_thread_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ static void ddtrace_otel_trace_identifiers_for(
VALUE active_span,
VALUE otel_values
);
static VALUE _native_sample_missed_allocations(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE missed_allocations);

void collectors_thread_context_init(VALUE profiling_module) {
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
Expand Down Expand Up @@ -261,6 +262,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
rb_define_singleton_method(testing_module, "_native_sample_missed_allocations", _native_sample_missed_allocations, 2);

at_active_span_id = rb_intern_const("@active_span");
at_active_trace_id = rb_intern_const("@active_trace");
Expand Down Expand Up @@ -1400,3 +1402,32 @@ static void ddtrace_otel_trace_identifiers_for(
*active_trace = current_trace;
*numeric_span_id = resolved_numeric_span_id;
}

void thread_context_collector_sample_missed_allocations(VALUE self_instance, unsigned int missed_allocations) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);

ddog_prof_Label labels[] = {
{.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("MA")},
{.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Missing Allocations")},
{.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Missing Allocations)")},
};
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])};

record_placeholder_stack(
state->sampling_buffer,
state->recorder_instance,
(sample_values) {.alloc_samples = missed_allocations},
(sample_labels) {
.labels = slice_labels,
.state_label = NULL,
.end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events
},
DDOG_CHARSLICE_C("Missing Allocations")
);
}

static VALUE _native_sample_missed_allocations(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE missed_allocations) {
thread_context_collector_sample_missed_allocations(collector_instance, NUM2UINT(missed_allocations));
return Qtrue;
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ void thread_context_collector_sample(
VALUE profiler_overhead_stack_thread
);
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
void thread_context_collector_sample_missed_allocations(VALUE self_instance, unsigned int missed_allocations);
VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
void thread_context_collector_on_gc_start(VALUE self_instance);
bool thread_context_collector_on_gc_finish(VALUE self_instance);
Expand Down
25 changes: 25 additions & 0 deletions spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@

context 'with dynamic_sampling_rate_enabled' do
let(:options) { { dynamic_sampling_rate_enabled: true } }

it 'keeps statistics on how allocation sampling is doing' do
stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))

Expand Down Expand Up @@ -536,6 +537,30 @@
one_second_in_ns = 1_000_000_000
expect(sampling_time_ns_max).to be < one_second_in_ns, "A single sample should not take longer than 1s, #{stats}"
end

# When large numbers of objects are allocated, the dynamic sampling rate kicks in, and we don't sample every
# object.
# We then assign a weight to every sample to compensate for this; to avoid bias, we have a limit on this weight,
# and we clamp it if it goes over the limit.
# But the total amount of allocations recorded should match the number we observed, and thus we record the
# remainder above the clamped value as a separate "Missing Allocations" step.
it 'records missed allocations when weights are clamped' do
start

thread_that_allocates_as_fast_as_possible = Thread.new { loop { BasicObject.new } }

allocation_samples = try_wait_until do
samples = samples_from_pprof(recorder.serialize!).select { |it| it.values[:'alloc-samples'] > 0 }
samples if samples.any? { |it| it.labels[:'thread name'] == 'Missing Allocations' }
end

thread_that_allocates_as_fast_as_possible.kill
thread_that_allocates_as_fast_as_possible.join

cpu_and_wall_time_worker.stop

expect(allocation_samples).to_not be_empty
end
end

context 'when sampling optimized Ruby strings' do
Expand Down
29 changes: 29 additions & 0 deletions spec/datadog/profiling/collectors/thread_context_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ def sample_allocation(weight:, new_object: Object.new)
described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object)
end

def sample_missed_allocations(missed_allocations)
described_class::Testing._native_sample_missed_allocations(cpu_and_wall_time_collector, missed_allocations)
end

def thread_list
described_class::Testing._native_thread_list
end
Expand Down Expand Up @@ -1210,6 +1214,31 @@ def self.otel_sdk_available?
end
end

describe '#sample_missed_allocations' do
let(:single_sample) do
expect(samples.size).to be 1
samples.first
end
before { sample_missed_allocations(123) }

it 'records the number of missed allocations' do
expect(single_sample.values).to include('alloc-samples': 123)
end

it 'attributes the missed allocations to a "Missing Allocations" thread' do
expect(single_sample.labels).to include('thread id': 'MA', 'thread name': 'Missing Allocations')
end

it 'attributes the missed allocations to a "(Missing Allocations)" allocation class' do
expect(single_sample.labels).to include('allocation class': '(Missing Allocations)')
end

it 'includes a placeholder stack attributed to "Missing Allocations"' do
expect(single_sample.locations.size).to be 1
expect(single_sample.locations.first.path).to eq 'Missing Allocations'
end
end

describe '#thread_list' do
it "returns the same as Ruby's Thread.list" do
expect(thread_list).to eq Thread.list
Expand Down

0 comments on commit 67b9633

Please sign in to comment.