diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index dff70397c16..c132fd2eb40 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -1140,9 +1140,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) { discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) : // if we aren't, then we're sampling every event 1; - // TODO: Signal in the profile that clamping happened? + + // To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids + // assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval. unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample; thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object); + // ...but we still represent the missing allocations in the profile, thus the data will account for all allocations. + if (weight < allocations_since_last_sample) { + thread_context_collector_sample_missed_allocations(state->thread_context_collector_instance, allocations_since_last_sample - weight); + } // Return a dummy VALUE because we're called from rb_rescue2 which requires it return Qnil; diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index 19b9b74109e..dad402523a6 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -231,6 +231,7 @@ static void ddtrace_otel_trace_identifiers_for( VALUE active_span, VALUE otel_values ); +static VALUE _native_sample_missed_allocations(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE missed_allocations); void collectors_thread_context_init(VALUE profiling_module) { VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors"); @@ -261,6 +262,7 @@ void collectors_thread_context_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1); rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1); rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0); + rb_define_singleton_method(testing_module, "_native_sample_missed_allocations", _native_sample_missed_allocations, 2); at_active_span_id = rb_intern_const("@active_span"); at_active_trace_id = rb_intern_const("@active_trace"); @@ -1400,3 +1402,32 @@ static void ddtrace_otel_trace_identifiers_for( *active_trace = current_trace; *numeric_span_id = resolved_numeric_span_id; } + +void thread_context_collector_sample_missed_allocations(VALUE self_instance, unsigned int missed_allocations) { + struct thread_context_collector_state *state; + TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); + + ddog_prof_Label labels[] = { + {.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("MA")}, + {.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Missing Allocations")}, + {.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Missing Allocations)")}, + }; + ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])}; + + record_placeholder_stack( + state->sampling_buffer, + state->recorder_instance, + (sample_values) {.alloc_samples = missed_allocations}, + (sample_labels) { + .labels = slice_labels, + .state_label = NULL, + .end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events + }, + DDOG_CHARSLICE_C("Missing Allocations") + ); +} + +static VALUE _native_sample_missed_allocations(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE missed_allocations) { + thread_context_collector_sample_missed_allocations(collector_instance, NUM2UINT(missed_allocations)); + return Qtrue; +} diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.h b/ext/datadog_profiling_native_extension/collectors_thread_context.h index 6299d96b43e..8566b16398d 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.h +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.h @@ -9,6 +9,7 @@ void thread_context_collector_sample( VALUE profiler_overhead_stack_thread ); void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object); +void thread_context_collector_sample_missed_allocations(VALUE self_instance, unsigned int missed_allocations); VALUE thread_context_collector_sample_after_gc(VALUE self_instance); void thread_context_collector_on_gc_start(VALUE self_instance); bool thread_context_collector_on_gc_finish(VALUE self_instance); diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 633e1271ac6..0bfc3c5ff9b 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -507,6 +507,7 @@ context 'with dynamic_sampling_rate_enabled' do let(:options) { { dynamic_sampling_rate_enabled: true } } + it 'keeps statistics on how allocation sampling is doing' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) @@ -536,6 +537,30 @@ one_second_in_ns = 1_000_000_000 expect(sampling_time_ns_max).to be < one_second_in_ns, "A single sample should not take longer than 1s, #{stats}" end + + # When large numbers of objects are allocated, the dynamic sampling rate kicks in, and we don't sample every + # object. + # We then assign a weight to every sample to compensate for this; to avoid bias, we have a limit on this weight, + # and we clamp it if it goes over the limit. + # But the total amount of allocations recorded should match the number we observed, and thus we record the + # remainder above the clamped value as a separate "Missing Allocations" step. + it 'records missed allocations when weights are clamped' do + start + + thread_that_allocates_as_fast_as_possible = Thread.new { loop { BasicObject.new } } + + allocation_samples = try_wait_until do + samples = samples_from_pprof(recorder.serialize!).select { |it| it.values[:'alloc-samples'] > 0 } + samples if samples.any? { |it| it.labels[:'thread name'] == 'Missing Allocations' } + end + + thread_that_allocates_as_fast_as_possible.kill + thread_that_allocates_as_fast_as_possible.join + + cpu_and_wall_time_worker.stop + + expect(allocation_samples).to_not be_empty + end end context 'when sampling optimized Ruby strings' do diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index 44323ce1665..3f76d6f1a4b 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -79,6 +79,10 @@ def sample_allocation(weight:, new_object: Object.new) described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object) end + def sample_missed_allocations(missed_allocations) + described_class::Testing._native_sample_missed_allocations(cpu_and_wall_time_collector, missed_allocations) + end + def thread_list described_class::Testing._native_thread_list end @@ -1210,6 +1214,31 @@ def self.otel_sdk_available? end end + describe '#sample_missed_allocations' do + let(:single_sample) do + expect(samples.size).to be 1 + samples.first + end + before { sample_missed_allocations(123) } + + it 'records the number of missed allocations' do + expect(single_sample.values).to include('alloc-samples': 123) + end + + it 'attributes the missed allocations to a "Missing Allocations" thread' do + expect(single_sample.labels).to include('thread id': 'MA', 'thread name': 'Missing Allocations') + end + + it 'attributes the missed allocations to a "(Missing Allocations)" allocation class' do + expect(single_sample.labels).to include('allocation class': '(Missing Allocations)') + end + + it 'includes a placeholder stack attributed to "Missing Allocations"' do + expect(single_sample.locations.size).to be 1 + expect(single_sample.locations.first.path).to eq 'Missing Allocations' + end + end + describe '#thread_list' do it "returns the same as Ruby's Thread.list" do expect(thread_list).to eq Thread.list