From bbbe0191334a5440fa39d471bfaa98fe89c562a8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 19 Oct 2024 10:40:02 -0700 Subject: [PATCH 1/7] wip: mark alive with list --- Include/internal/pycore_gc.h | 9 +++- Python/gc.c | 88 ++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 357177bcd6fd84..fb4a35f9ba3591 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -145,8 +145,11 @@ static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) { #define _PyGC_PREV_MASK_FINALIZED (1) /* Bit 1 is set when the object is in generation which is GCed currently. */ #define _PyGC_PREV_MASK_COLLECTING (2) -/* The (N-2) most significant bits contain the real address. */ -#define _PyGC_PREV_SHIFT (2) +/* Bit 2 is to mark the object as alive due to being reachable from a root + * object. This is used when the incremental mark process is running. */ +#define _PyGC_PREV_MASK_OLD (4) +/* The number of least least significant bits used for flags. */ +#define _PyGC_PREV_SHIFT (3) #define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) /* set for debugging information */ @@ -301,6 +304,8 @@ struct _gc_runtime_state { PyObject *garbage; /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + /* Marker object for incremental mark alive process */ + PyObject *thumb; /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects diff --git a/Python/gc.c b/Python/gc.c index 8dbcb340d4089d..c07edc3b795886 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -53,6 +53,18 @@ typedef struct _gc_runtime_state GCState; // Automatically choose the generation that needs collecting. #define GENERATION_AUTO (-1) +static inline int gc_is_old(PyGC_Head *gc) { + return ((gc->_gc_prev & _PyGC_PREV_MASK_OLD) != 0); +} + +static inline void gc_set_old(PyGC_Head *gc) { + gc->_gc_prev |= _PyGC_PREV_MASK_OLD; +} + +static inline void gc_clear_old(PyGC_Head *gc) { + gc->_gc_prev &= ~_PyGC_PREV_MASK_OLD; +} + static inline int gc_is_collecting(PyGC_Head *g) { @@ -142,6 +154,11 @@ _PyGC_Init(PyInterpreterState *interp) return _PyStatus_NO_MEMORY(); } + gcstate->thumb = PyList_New(0); + if (gcstate->thumb == NULL) { + return _PyStatus_NO_MEMORY(); + } + return _PyStatus_OK(); } @@ -1048,6 +1065,74 @@ show_stats_each_generations(GCState *gcstate) buf, gc_list_size(&gcstate->permanent_generation.head)); } +static void +mark_old(PyGC_Head *head) { + for (PyGC_Head *gc = GC_NEXT(head); gc != head; gc = GC_NEXT(gc)) { + gc_set_old(gc); + } +} + +struct mark_state { + PyGC_Head todo; + Py_ssize_t alive; // number of objects found alive +}; + +static int visit_reachable_old(PyObject *op, struct mark_state *state); + +static void +propogate_old_reachable(PyObject *op, struct mark_state *state) +{ + PyGC_Head *gc = AS_GC(op); + /* mark it black as it is proven reachable and we don't have + * to look at it anymore. Note that every loop in + * mark_reachable() must at least turn some GREY to BLACK in + * order for us to make progress. */ + gc_clear_old(gc); + state->alive++; + traverseproc traverse = Py_TYPE(op)->tp_traverse; + traverse(op, (visitproc)visit_reachable_old, state); +} + +/* A traversal callback for visit_reachable_grey. */ +static int +visit_reachable_old(PyObject *op, struct mark_state *state) +{ + if (!(_PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op))) { + return 0; + } + PyGC_Head *gc = AS_GC(op); + if (gc_is_old(gc)) { + gc_list_move(gc, &state->todo); + } + return 0; +} + +static void +deduce_old_alive(PyThreadState *tstate, PyGC_Head *head) { + PyObject *root = tstate->interp->sysdict; + + struct mark_state mstate; + mstate.alive = 0; + gc_list_init(&mstate.todo); + + PyGC_Head old_alive; + gc_list_init(&old_alive); + + unsigned long n = gc_list_size(head); + unsigned long alive = 0; + + propogate_old_reachable(root, &mstate); + while (! gc_list_is_empty(&mstate.todo)) { + PyGC_Head *gc = GC_NEXT(&mstate.todo); + alive++; + propogate_old_reachable(FROM_GC(gc), &mstate); + gc_list_move(gc, &old_alive); + } + fprintf(stderr, "gc mark n=%lu alive=%lu\n", n, alive); + gc_list_merge(&old_alive, head); +} + + /* Deduce which objects among "base" are unreachable from outside the list and move them to 'unreachable'. The process consist in the following steps: @@ -1357,6 +1442,9 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } validate_list(old, collecting_clear_unreachable_clear); + mark_old(old); + deduce_old_alive(tstate, old); + deduce_unreachable(young, &unreachable); untrack_tuples(young); From 49400976680828e31d5be7bb070d9c100cdd475e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 19 Oct 2024 17:44:03 -0700 Subject: [PATCH 2/7] wip: progress --- Include/internal/pycore_gc.h | 2 ++ Modules/clinic/gcmodule.c.h | 20 +++++++++++- Modules/gcmodule.c | 19 ++++++++++++ Python/gc.c | 59 ++++++++++++++++++++++++++++-------- Python/pylifecycle.c | 6 ++++ 5 files changed, 92 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index fb4a35f9ba3591..9db76a6a6575fb 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -295,6 +295,8 @@ struct _gc_runtime_state { /* linked lists of container objects */ struct gc_generation generations[NUM_GENERATIONS]; PyGC_Head *generation0; + /* Objects in oldest generation that have be determined to be alive */ + struct gc_generation old_alive; /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_generation_stats generation_stats[NUM_GENERATIONS]; diff --git a/Modules/clinic/gcmodule.c.h b/Modules/clinic/gcmodule.c.h index 9fff4da616ba00..27836197a378bd 100644 --- a/Modules/clinic/gcmodule.c.h +++ b/Modules/clinic/gcmodule.c.h @@ -440,6 +440,24 @@ gc_get_objects(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje return return_value; } +PyDoc_STRVAR(gc_get_alive_objects__doc__, +"get_alive_objects($module, /)\n" +"--\n" +"\n" +"Return a list of objects tracked by the collector (excluding the list returned)."); + +#define GC_GET_ALIVE_OBJECTS_METHODDEF \ + {"get_alive_objects", (PyCFunction)gc_get_alive_objects, METH_NOARGS, gc_get_alive_objects__doc__}, + +static PyObject * +gc_get_alive_objects_impl(PyObject *module); + +static PyObject * +gc_get_alive_objects(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return gc_get_alive_objects_impl(module); +} + PyDoc_STRVAR(gc_get_stats__doc__, "get_stats($module, /)\n" "--\n" @@ -585,4 +603,4 @@ gc_get_freeze_count(PyObject *module, PyObject *Py_UNUSED(ignored)) exit: return return_value; } -/*[clinic end generated code: output=0a7e91917adcb937 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=debb8f0db8f3a13e input=a9049054013a1b77]*/ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index b57a1c90723306..b5013ff03437d0 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -334,6 +334,23 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation) return _PyGC_GetObjects(interp, (int)generation); } +extern PyObject *_PyGC_GetOldAliveObjects(PyInterpreterState *interp); + +/*[clinic input] +gc.get_alive_objects + +Return a list of objects tracked by the collector (excluding the list returned). + +[clinic start generated code]*/ + +static PyObject * +gc_get_alive_objects_impl(PyObject *module) +/*[clinic end generated code: output=24e4083e0f1b0ebf input=e421931cb78142b8]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return _PyGC_GetOldAliveObjects(interp); +} + /*[clinic input] gc.get_stats @@ -481,6 +498,7 @@ PyDoc_STRVAR(gc__doc__, "set_threshold() -- Set the collection thresholds.\n" "get_threshold() -- Return the current the collection thresholds.\n" "get_objects() -- Return a list of all objects tracked by the collector.\n" +"get_alive_objects() -- Return a list of objects determined to be alive.\n" "is_tracked() -- Returns true if a given object is tracked.\n" "is_finalized() -- Returns true if a given object has been already finalized.\n" "get_referrers() -- Return the list of objects that refer to an object.\n" @@ -500,6 +518,7 @@ static PyMethodDef GcMethods[] = { GC_GET_THRESHOLD_METHODDEF GC_COLLECT_METHODDEF GC_GET_OBJECTS_METHODDEF + GC_GET_ALIVE_OBJECTS_METHODDEF GC_GET_STATS_METHODDEF GC_IS_TRACKED_METHODDEF GC_IS_FINALIZED_METHODDEF diff --git a/Python/gc.c b/Python/gc.c index c07edc3b795886..834275cf2e4c8e 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -133,6 +133,7 @@ _PyGC_InitState(GCState *gcstate) INIT_HEAD(gcstate->generations[i]); }; gcstate->generation0 = GEN_HEAD(gcstate, 0); + INIT_HEAD(gcstate->old_alive); INIT_HEAD(gcstate->permanent_generation); #undef INIT_HEAD @@ -1082,12 +1083,6 @@ static int visit_reachable_old(PyObject *op, struct mark_state *state); static void propogate_old_reachable(PyObject *op, struct mark_state *state) { - PyGC_Head *gc = AS_GC(op); - /* mark it black as it is proven reachable and we don't have - * to look at it anymore. Note that every loop in - * mark_reachable() must at least turn some GREY to BLACK in - * order for us to make progress. */ - gc_clear_old(gc); state->alive++; traverseproc traverse = Py_TYPE(op)->tp_traverse; traverse(op, (visitproc)visit_reachable_old, state); @@ -1102,6 +1097,7 @@ visit_reachable_old(PyObject *op, struct mark_state *state) } PyGC_Head *gc = AS_GC(op); if (gc_is_old(gc)) { + gc_clear_old(gc); gc_list_move(gc, &state->todo); } return 0; @@ -1109,27 +1105,39 @@ visit_reachable_old(PyObject *op, struct mark_state *state) static void deduce_old_alive(PyThreadState *tstate, PyGC_Head *head) { - PyObject *root = tstate->interp->sysdict; + GCState *gcstate = &tstate->interp->gc; struct mark_state mstate; mstate.alive = 0; gc_list_init(&mstate.todo); - PyGC_Head old_alive; - gc_list_init(&old_alive); + gc_list_merge(&gcstate->old_alive.head, head); + + mark_old(head); unsigned long n = gc_list_size(head); unsigned long alive = 0; + PyObject *root = tstate->interp->sysdict; propogate_old_reachable(root, &mstate); + PyObject *modules = _PyImport_GetModules(tstate->interp); + if (modules != NULL) { + propogate_old_reachable(modules, &mstate); + } +#if 0 + _PyInterpreterFrame *frame = tstate->current_frame; + if (frame != NULL) { + propogate_old_reachable(_PyFrame_GetFrameObject(frame), &mstate); + } +#endif + while (! gc_list_is_empty(&mstate.todo)) { PyGC_Head *gc = GC_NEXT(&mstate.todo); alive++; propogate_old_reachable(FROM_GC(gc), &mstate); - gc_list_move(gc, &old_alive); + gc_list_move(gc, &gcstate->old_alive.head); } - fprintf(stderr, "gc mark n=%lu alive=%lu\n", n, alive); - gc_list_merge(&old_alive, head); + fprintf(stderr, "gc mark n=%lu old=%lu alive=%lu\n", n, n-alive, alive); } @@ -1442,7 +1450,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } validate_list(old, collecting_clear_unreachable_clear); - mark_old(old); deduce_old_alive(tstate, old); deduce_unreachable(young, &unreachable); @@ -1650,6 +1657,12 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) goto error; } } + if (generation == -1 || generation == (NUM_GENERATIONS - 1)) { + // include the "old_alive" set of objects as well + if (append_objects(result, &gcstate->old_alive.head)) { + goto error; + } + } return result; error: @@ -1657,6 +1670,25 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) return NULL; } +PyObject * +_PyGC_GetOldAliveObjects(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + + PyObject *result = PyList_New(0); + if (result == NULL) { + return NULL; + } + + if (append_objects(result, &gcstate->old_alive.head)) { + Py_DECREF(result); + return NULL; + } + + return result; +} + + void _PyGC_Freeze(PyInterpreterState *interp) { @@ -1819,6 +1851,7 @@ _PyGC_Fini(PyInterpreterState *interp) for (int i = 0; i < NUM_GENERATIONS; i++) { finalize_unlink_gc_head(&gcstate->generations[i].head); } + finalize_unlink_gc_head(&gcstate->old_alive.head); finalize_unlink_gc_head(&gcstate->permanent_generation.head); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 1701a1cd217440..6b81d621e2434e 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1342,6 +1342,11 @@ init_interp_main(PyThreadState *tstate) return _PyStatus_ERR("failed to set builtin dict watcher"); } +#if 1 + fprintf(stderr, "freezing\n"); + _PyGC_Freeze(interp); +#endif + assert(!_PyErr_Occurred(tstate)); return _PyStatus_OK(); @@ -2212,6 +2217,7 @@ Py_Finalize(void) (void)_Py_Finalize(&_PyRuntime); } +extern void _PyGC_Freeze(PyInterpreterState *interp); /* Create and initialize a new interpreter and thread, and return the new thread. This requires that Py_Initialize() has been called From 84d405374524989ef91328c5aaab5942cc282541 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 20 Oct 2024 12:52:55 -0700 Subject: [PATCH 3/7] wip: working --- Include/internal/pycore_gc.h | 9 +- Python/gc.c | 158 ++++++++++++++++++++++++----------- Python/pylifecycle.c | 2 +- 3 files changed, 119 insertions(+), 50 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 9db76a6a6575fb..a6fd2dddc54cdc 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -296,7 +296,13 @@ struct _gc_runtime_state { struct gc_generation generations[NUM_GENERATIONS]; PyGC_Head *generation0; /* Objects in oldest generation that have be determined to be alive */ - struct gc_generation old_alive; + PyGC_Head old_alive; + /* Size of oldest generation, on start of incremental mark process */ + Py_ssize_t old_size; + /* Number of alive objects found in oldest generation */ + Py_ssize_t old_alive_size; + int mark_steps; + int mark_steps_total; /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_generation_stats generation_stats[NUM_GENERATIONS]; @@ -308,6 +314,7 @@ struct _gc_runtime_state { PyObject *callbacks; /* Marker object for incremental mark alive process */ PyObject *thumb; + int mark_phase; /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects diff --git a/Python/gc.c b/Python/gc.c index 834275cf2e4c8e..cbbc4b638e1413 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -53,6 +53,11 @@ typedef struct _gc_runtime_state GCState; // Automatically choose the generation that needs collecting. #define GENERATION_AUTO (-1) +// phases of incremental mark alive process +#define MARK_PHASE_INIT 1 +#define MARK_PHASE_STEP 2 +#define MARK_PHASE_DONE 3 + static inline int gc_is_old(PyGC_Head *gc) { return ((gc->_gc_prev & _PyGC_PREV_MASK_OLD) != 0); } @@ -122,21 +127,21 @@ get_gc_state(void) void _PyGC_InitState(GCState *gcstate) { -#define INIT_HEAD(GEN) \ +#define INIT_GC_LIST(HEAD) \ do { \ - GEN.head._gc_next = (uintptr_t)&GEN.head; \ - GEN.head._gc_prev = (uintptr_t)&GEN.head; \ + HEAD._gc_next = (uintptr_t)&HEAD; \ + HEAD._gc_prev = (uintptr_t)&HEAD; \ } while (0) for (int i = 0; i < NUM_GENERATIONS; i++) { assert(gcstate->generations[i].count == 0); - INIT_HEAD(gcstate->generations[i]); + INIT_GC_LIST(gcstate->generations[i].head); }; gcstate->generation0 = GEN_HEAD(gcstate, 0); - INIT_HEAD(gcstate->old_alive); - INIT_HEAD(gcstate->permanent_generation); + INIT_GC_LIST(gcstate->permanent_generation.head); + INIT_GC_LIST(gcstate->old_alive); -#undef INIT_HEAD +#undef INIT_GC_LIST } @@ -159,6 +164,9 @@ _PyGC_Init(PyInterpreterState *interp) if (gcstate->thumb == NULL) { return _PyStatus_NO_MEMORY(); } + gcstate->mark_phase = MARK_PHASE_DONE; + gcstate->old_alive_size = 0; + gcstate->old_size = 0; return _PyStatus_OK(); } @@ -1066,11 +1074,14 @@ show_stats_each_generations(GCState *gcstate) buf, gc_list_size(&gcstate->permanent_generation.head)); } -static void +static Py_ssize_t mark_old(PyGC_Head *head) { + Py_ssize_t n = 0; for (PyGC_Head *gc = GC_NEXT(head); gc != head; gc = GC_NEXT(gc)) { gc_set_old(gc); + n++; } + return n; } struct mark_state { @@ -1078,19 +1089,18 @@ struct mark_state { Py_ssize_t alive; // number of objects found alive }; -static int visit_reachable_old(PyObject *op, struct mark_state *state); +static int visit_reachable_old(PyObject *op, PyGC_Head *todo); static void -propogate_old_reachable(PyObject *op, struct mark_state *state) +propogate_old_reachable(PyObject *op, PyGC_Head *todo) { - state->alive++; traverseproc traverse = Py_TYPE(op)->tp_traverse; - traverse(op, (visitproc)visit_reachable_old, state); + traverse(op, (visitproc)visit_reachable_old, todo); } -/* A traversal callback for visit_reachable_grey. */ +/* A traversal callback for propogate_old_reachable. */ static int -visit_reachable_old(PyObject *op, struct mark_state *state) +visit_reachable_old(PyObject *op, PyGC_Head *todo) { if (!(_PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op))) { return 0; @@ -1098,49 +1108,101 @@ visit_reachable_old(PyObject *op, struct mark_state *state) PyGC_Head *gc = AS_GC(op); if (gc_is_old(gc)) { gc_clear_old(gc); - gc_list_move(gc, &state->todo); + gc_list_move(gc, todo); } return 0; } static void -deduce_old_alive(PyThreadState *tstate, PyGC_Head *head) { +mark_alive_step(PyThreadState *tstate, PyGC_Head *old_head) +{ GCState *gcstate = &tstate->interp->gc; - struct mark_state mstate; - mstate.alive = 0; - gc_list_init(&mstate.todo); - - gc_list_merge(&gcstate->old_alive.head, head); - - mark_old(head); - - unsigned long n = gc_list_size(head); - unsigned long alive = 0; - - PyObject *root = tstate->interp->sysdict; - propogate_old_reachable(root, &mstate); - PyObject *modules = _PyImport_GetModules(tstate->interp); - if (modules != NULL) { - propogate_old_reachable(modules, &mstate); + PyGC_Head *alive_head = &gcstate->old_alive; + if (gcstate->mark_phase == MARK_PHASE_INIT) { + gc_list_merge(alive_head, old_head); + gcstate->mark_steps = 0; + gcstate->mark_steps_total = 0; + gcstate->old_alive_size = 0; + gcstate->old_size = mark_old(old_head); + fprintf(stderr, "gc init mark head %lu\n", gcstate->old_size); + PyObject *root = tstate->interp->sysdict; + gc_list_move(AS_GC(gcstate->thumb), alive_head); + propogate_old_reachable(root, alive_head); + gcstate->mark_phase = MARK_PHASE_STEP; + } + if (gcstate->mark_phase == MARK_PHASE_STEP) { + Py_ssize_t n = gcstate->old_size; + Py_ssize_t alive = 0; + PyGC_Head *gc = GC_NEXT(AS_GC(gcstate->thumb)); + unsigned long step_count = n / 100; + if (step_count < 10) { + step_count = 10; + } + gcstate->mark_steps++; + unsigned long total_steps = 0; + for (;;) { + total_steps++; + PyGC_Head *next = GC_NEXT(gc); + if (gc == alive_head) { + gcstate->mark_phase = MARK_PHASE_DONE; + alive = gcstate->old_alive_size; + fprintf(stderr, "gc mark done n=%lu old=%lu alive=%lu\n", n, n-alive, alive); + break; + } + if (--step_count == 0) { + alive = gcstate->old_alive_size; + fprintf(stderr, "gc mark step n=%lu old=%lu alive=%lu\n", n, n-alive, alive); + // remember our position in old_alive list with thumb object + gc_list_move(AS_GC(gcstate->thumb), next); + break; + } + alive++; // count for this step + gcstate->old_alive_size++; // count for all steps + propogate_old_reachable(FROM_GC(gc), alive_head); + gc = next; + } } -#if 0 - _PyInterpreterFrame *frame = tstate->current_frame; - if (frame != NULL) { - propogate_old_reachable(_PyFrame_GetFrameObject(frame), &mstate); + else { + //fprintf(stderr, "gc mark is done, nothing to do\n"); } -#endif + gcstate->mark_steps_total++; +} - while (! gc_list_is_empty(&mstate.todo)) { - PyGC_Head *gc = GC_NEXT(&mstate.todo); - alive++; - propogate_old_reachable(FROM_GC(gc), &mstate); - gc_list_move(gc, &gcstate->old_alive.head); +#define OLD_GENERATION (NUM_GENERATIONS-1) + +static void +maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *old = GEN_HEAD(gcstate, OLD_GENERATION); + if (generation == 0) { + // Do one step of "mark alive" incremental work. This will look at + // a fraction of 'old' to determine what is definitely alive, due + // to being reachable from a known root object. Those objects will + // be moved out of 'old' and into the 'old_alive' list. + mark_alive_step(tstate, old); + } + else { + if (reason != _Py_GC_REASON_HEAP) { + if (generation == OLD_GENERATION) { + // if doing full, non-heap collection, merge old_alive list so + // everything gets examined + gc_list_merge(&gcstate->old_alive, old); + gcstate->mark_phase = MARK_PHASE_INIT; + } + } + else { + if (generation == OLD_GENERATION) { + float f = ((float)gcstate->old_alive_size) / gcstate->old_size; + fprintf(stderr, "doing full auto collection %.2f\n", f); + fprintf(stderr, "steps %d %d\n", gcstate->mark_steps, gcstate->mark_steps_total); + gcstate->mark_phase = MARK_PHASE_INIT; + } + } } - fprintf(stderr, "gc mark n=%lu old=%lu alive=%lu\n", n, n-alive, alive); } - /* Deduce which objects among "base" are unreachable from outside the list and move them to 'unreachable'. The process consist in the following steps: @@ -1450,7 +1512,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } validate_list(old, collecting_clear_unreachable_clear); - deduce_old_alive(tstate, old); + maybe_mark_alive(tstate, generation, reason); deduce_unreachable(young, &unreachable); @@ -1659,7 +1721,7 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) } if (generation == -1 || generation == (NUM_GENERATIONS - 1)) { // include the "old_alive" set of objects as well - if (append_objects(result, &gcstate->old_alive.head)) { + if (append_objects(result, &gcstate->old_alive)) { goto error; } } @@ -1680,7 +1742,7 @@ _PyGC_GetOldAliveObjects(PyInterpreterState *interp) return NULL; } - if (append_objects(result, &gcstate->old_alive.head)) { + if (append_objects(result, &gcstate->old_alive)) { Py_DECREF(result); return NULL; } @@ -1851,8 +1913,8 @@ _PyGC_Fini(PyInterpreterState *interp) for (int i = 0; i < NUM_GENERATIONS; i++) { finalize_unlink_gc_head(&gcstate->generations[i].head); } - finalize_unlink_gc_head(&gcstate->old_alive.head); finalize_unlink_gc_head(&gcstate->permanent_generation.head); + finalize_unlink_gc_head(&gcstate->old_alive); } /* for debugging */ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 6b81d621e2434e..85920f8a8a25b5 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1342,7 +1342,7 @@ init_interp_main(PyThreadState *tstate) return _PyStatus_ERR("failed to set builtin dict watcher"); } -#if 1 +#if 0 fprintf(stderr, "freezing\n"); _PyGC_Freeze(interp); #endif From 4f69eadbb30122329b857fd7d44ff7307acb5268 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 20 Oct 2024 18:24:28 -0700 Subject: [PATCH 4/7] wip: add timing stats --- Include/internal/pycore_gc.h | 30 ++- Include/internal/pycore_runtime_init.h | 6 +- Python/gc.c | 310 ++++++++++++++++++++++--- 3 files changed, 295 insertions(+), 51 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index a6fd2dddc54cdc..30305d35df24d3 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -282,6 +282,24 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +struct gc_mark_state { + /* Objects in oldest generation that have be determined to be alive */ + PyGC_Head old_alive; + /* Marker object for incremental mark alive process */ + PyObject *thumb; + /* Size of oldest generation, on start of incremental mark process */ + Py_ssize_t old_size; + /* Number of alive objects found in oldest generation */ + Py_ssize_t old_alive_size; + int mark_phase; + int mark_steps; + int mark_steps_total; + PyTime_t gc_total_time; + PyTime_t gc_mark_time; + PyTime_t gc_max_pause; + PyTime_t gc_runs; +}; + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked * via their gc headers' gc_prev pointers. */ @@ -295,14 +313,6 @@ struct _gc_runtime_state { /* linked lists of container objects */ struct gc_generation generations[NUM_GENERATIONS]; PyGC_Head *generation0; - /* Objects in oldest generation that have be determined to be alive */ - PyGC_Head old_alive; - /* Size of oldest generation, on start of incremental mark process */ - Py_ssize_t old_size; - /* Number of alive objects found in oldest generation */ - Py_ssize_t old_alive_size; - int mark_steps; - int mark_steps_total; /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_generation_stats generation_stats[NUM_GENERATIONS]; @@ -312,9 +322,7 @@ struct _gc_runtime_state { PyObject *garbage; /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; - /* Marker object for incremental mark alive process */ - PyObject *thumb; - int mark_phase; + struct gc_mark_state mark_state; /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 7eef9edc0aa33d..bf5130b5aa9689 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -230,9 +230,9 @@ extern PyTypeObject _PyExc_MemoryError; .enabled = 1, \ .generations = { \ /* .head is set in _PyGC_InitState(). */ \ - { .threshold = 2000, }, \ - { .threshold = 10, }, \ - { .threshold = 10, }, \ + { .threshold = 400, }, \ + { .threshold = 5, }, \ + { .threshold = 5, }, \ }, \ }, \ .qsbr = { \ diff --git a/Python/gc.c b/Python/gc.c index cbbc4b638e1413..d5de3db883124b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -13,8 +13,11 @@ #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_weakref.h" // _PyWeakref_ClearRef() +#include "pycore_time.h" // _PyTime_AsMicroseconds() #include "pydtrace.h" +static bool gc_mark_disabled; + #ifndef Py_GIL_DISABLED typedef struct _gc_runtime_state GCState; @@ -58,6 +61,193 @@ typedef struct _gc_runtime_state GCState; #define MARK_PHASE_STEP 2 #define MARK_PHASE_DONE 3 +#if 1 // gc timing +// +#define QUANTILE_COUNT 5 +#define MARKER_COUNT (QUANTILE_COUNT * 3 + 2) + +typedef struct { + double q[MARKER_COUNT]; + double dn[MARKER_COUNT]; + double np[MARKER_COUNT]; + int n[MARKER_COUNT]; + int count; + double max; +} P2Engine; + +static void p2engine_init(P2Engine* engine, const double quantiles[QUANTILE_COUNT]) { + engine->count = 0; + engine->max = 0.0; + + engine->dn[0] = 0; + engine->dn[MARKER_COUNT - 1] = 1; + + int pointer = 1; + for (int i = 0; i < QUANTILE_COUNT; i++) { + double quantile = quantiles[i]; + engine->dn[pointer] = quantile; + engine->dn[pointer + 1] = quantile / 2; + engine->dn[pointer + 2] = (1 + quantile) / 2; + pointer += 3; + } + + // Sort the markers + for (int i = 0; i < MARKER_COUNT; i++) { + for (int j = i + 1; j < MARKER_COUNT; j++) { + if (engine->dn[i] > engine->dn[j]) { + double temp = engine->dn[i]; + engine->dn[i] = engine->dn[j]; + engine->dn[j] = temp; + } + } + } + + for (int i = 0; i < MARKER_COUNT; i++) { + engine->np[i] = (MARKER_COUNT - 1) * engine->dn[i] + 1; + } +} + +static int sign(double d) { + return (d >= 0) ? 1 : -1; +} + +static double parabolic(P2Engine* engine, int i, int d) { + return engine->q[i] + d / ((engine->n[i + 1] - engine->n[i - 1]) * + ((engine->n[i] - engine->n[i - 1] + d) * ((engine->q[i + 1] - engine->q[i]) / (engine->n[i + 1] - engine->n[i])) + + (engine->n[i + 1] - (engine->n[i] - d)) * ((engine->q[i] - engine->q[i - 1]) / (engine->n[i] - engine->n[i - 1])))); +} + +static double linear(P2Engine* engine, int i, int d) { + return engine->q[i] + d * ((engine->q[i + d] - engine->q[i]) / (engine->n[i + d] - engine->n[i])); +} + +static void p2engine_add(P2Engine* engine, double data) { + int i, k = 0; + if (data > engine->max) { + engine->max = data; + } + if (engine->count >= MARKER_COUNT) { + engine->count++; + // B1 + if (data < engine->q[0]) { + engine->q[0] = data; + k = 1; + } else if (data >= engine->q[MARKER_COUNT - 1]) { + engine->q[MARKER_COUNT - 1] = data; + k = MARKER_COUNT - 1; + } else { + for (i = 1; i < MARKER_COUNT; i++) { + if (data < engine->q[i]) { + k = i; + break; + } + } + } + + // B2 + for (i = k; i < MARKER_COUNT; i++) { + engine->n[i]++; + engine->np[i] = engine->np[i] + engine->dn[i]; + } + + for (i = 0; i < k; i++) { + engine->np[i] = engine->np[i] + engine->dn[i]; + } + + // B3 + for (i = 1; i < MARKER_COUNT - 1; i++) { + double d = engine->np[i] - engine->n[i]; + if ((d >= 1 && engine->n[i + 1] - engine->n[i] > 1) || + (d <= -1 && engine->n[i - 1] - engine->n[i] < -1)) { + double newq = parabolic(engine, i, sign(d)); + if (engine->q[i - 1] < newq && newq < engine->q[i + 1]) { + engine->q[i] = newq; + } else { + engine->q[i] = linear(engine, i, sign(d)); + } + engine->n[i] = engine->n[i] + sign(d); + } + } + } else { + engine->q[engine->count] = data; + engine->count++; + if (engine->count == MARKER_COUNT) { + // We have enough to start the algorithm, initialize + for (i = 0; i < MARKER_COUNT; i++) { + for (int j = i + 1; j < MARKER_COUNT; j++) { + if (engine->q[i] > engine->q[j]) { + double temp = engine->q[i]; + engine->q[i] = engine->q[j]; + engine->q[j] = temp; + } + } + } + for (i = 0; i < MARKER_COUNT; i++) { + engine->n[i] = i + 1; + } + } + } +} + +static double p2engine_result(P2Engine* engine, double quantile) { + if (engine->count < MARKER_COUNT) { + int closest = 1; + for (int i = 0; i < engine->count; i++) { + for (int j = i + 1; j < engine->count; j++) { + if (engine->q[i] > engine->q[j]) { + double temp = engine->q[i]; + engine->q[i] = engine->q[j]; + engine->q[j] = temp; + } + } + } + for (int i = 2; i < engine->count; i++) { + if (fabs((double)i / engine->count - quantile) < + fabs((double)closest / MARKER_COUNT - quantile)) { + closest = i; + } + } + return engine->q[closest]; + } else { + int closest = 1; + for (int i = 2; i < MARKER_COUNT - 1; i++) { + if (fabs(engine->dn[i] - quantile) < fabs(engine->dn[closest] - quantile)) { + closest = i; + } + } + return engine->q[closest]; + } +} + +static double gc_timing_quantiles[QUANTILE_COUNT] = {0.5, 0.75, 0.9, 0.95, 0.99}; + +#if 0 +// Example usage +int main() { + P2Engine engine; + double quantiles[QUANTILE_COUNT] = {0.25, 0.5, 0.75, 0.9}; // quartiles and 90th percentile + p2engine_init(&engine, quantiles); + + // Generate random data and add to engine + for (int i = 0; i < 1000000; i++) { + double value = (double)rand() / RAND_MAX; + p2engine_add(&engine, value); + } + + printf("Approximate results:\n"); + for (int i = 0; i < QUANTILE_COUNT; i++) { + double result = p2engine_result(&engine, quantiles[i]); + printf("Q%.2f: %f\n", quantiles[i], result); + } + + return 0; +} +#endif + +P2Engine gc_timing; + +#endif // gc timing + static inline int gc_is_old(PyGC_Head *gc) { return ((gc->_gc_prev & _PyGC_PREV_MASK_OLD) != 0); } @@ -139,7 +329,7 @@ _PyGC_InitState(GCState *gcstate) }; gcstate->generation0 = GEN_HEAD(gcstate, 0); INIT_GC_LIST(gcstate->permanent_generation.head); - INIT_GC_LIST(gcstate->old_alive); + INIT_GC_LIST(gcstate->mark_state.old_alive); #undef INIT_GC_LIST } @@ -160,13 +350,18 @@ _PyGC_Init(PyInterpreterState *interp) return _PyStatus_NO_MEMORY(); } - gcstate->thumb = PyList_New(0); - if (gcstate->thumb == NULL) { + gcstate->mark_state.thumb = PyList_New(0); + if (gcstate->mark_state.thumb == NULL) { return _PyStatus_NO_MEMORY(); } - gcstate->mark_phase = MARK_PHASE_DONE; - gcstate->old_alive_size = 0; - gcstate->old_size = 0; + gcstate->mark_state.mark_phase = MARK_PHASE_DONE; + gcstate->mark_state.old_alive_size = 0; + gcstate->mark_state.old_size = 0; + if (getenv("GC_MARK_DISABLED")) { + gc_mark_disabled = 1; + } + + p2engine_init(&gc_timing, gc_timing_quantiles); return _PyStatus_OK(); } @@ -1118,47 +1313,47 @@ mark_alive_step(PyThreadState *tstate, PyGC_Head *old_head) { GCState *gcstate = &tstate->interp->gc; - PyGC_Head *alive_head = &gcstate->old_alive; - if (gcstate->mark_phase == MARK_PHASE_INIT) { + PyGC_Head *alive_head = &gcstate->mark_state.old_alive; + if (gcstate->mark_state.mark_phase == MARK_PHASE_INIT) { gc_list_merge(alive_head, old_head); - gcstate->mark_steps = 0; - gcstate->mark_steps_total = 0; - gcstate->old_alive_size = 0; - gcstate->old_size = mark_old(old_head); - fprintf(stderr, "gc init mark head %lu\n", gcstate->old_size); + gcstate->mark_state.mark_steps = 0; + gcstate->mark_state.mark_steps_total = 0; + gcstate->mark_state.old_alive_size = 0; + gcstate->mark_state.old_size = mark_old(old_head); + fprintf(stderr, "gc init mark head %lu\n", gcstate->mark_state.old_size); PyObject *root = tstate->interp->sysdict; - gc_list_move(AS_GC(gcstate->thumb), alive_head); + gc_list_move(AS_GC(gcstate->mark_state.thumb), alive_head); propogate_old_reachable(root, alive_head); - gcstate->mark_phase = MARK_PHASE_STEP; + gcstate->mark_state.mark_phase = MARK_PHASE_STEP; } - if (gcstate->mark_phase == MARK_PHASE_STEP) { - Py_ssize_t n = gcstate->old_size; + if (gcstate->mark_state.mark_phase == MARK_PHASE_STEP) { + Py_ssize_t n = gcstate->mark_state.old_size; Py_ssize_t alive = 0; - PyGC_Head *gc = GC_NEXT(AS_GC(gcstate->thumb)); - unsigned long step_count = n / 100; + PyGC_Head *gc = GC_NEXT(AS_GC(gcstate->mark_state.thumb)); + unsigned long step_count = n / 25; if (step_count < 10) { step_count = 10; } - gcstate->mark_steps++; + gcstate->mark_state.mark_steps++; unsigned long total_steps = 0; for (;;) { total_steps++; PyGC_Head *next = GC_NEXT(gc); if (gc == alive_head) { - gcstate->mark_phase = MARK_PHASE_DONE; - alive = gcstate->old_alive_size; + gcstate->mark_state.mark_phase = MARK_PHASE_DONE; + alive = gcstate->mark_state.old_alive_size; fprintf(stderr, "gc mark done n=%lu old=%lu alive=%lu\n", n, n-alive, alive); break; } if (--step_count == 0) { - alive = gcstate->old_alive_size; + alive = gcstate->mark_state.old_alive_size; fprintf(stderr, "gc mark step n=%lu old=%lu alive=%lu\n", n, n-alive, alive); // remember our position in old_alive list with thumb object - gc_list_move(AS_GC(gcstate->thumb), next); + gc_list_move(AS_GC(gcstate->mark_state.thumb), next); break; } alive++; // count for this step - gcstate->old_alive_size++; // count for all steps + gcstate->mark_state.old_alive_size++; // count for all steps propogate_old_reachable(FROM_GC(gc), alive_head); gc = next; } @@ -1166,41 +1361,62 @@ mark_alive_step(PyThreadState *tstate, PyGC_Head *old_head) else { //fprintf(stderr, "gc mark is done, nothing to do\n"); } - gcstate->mark_steps_total++; + gcstate->mark_state.mark_steps_total++; } #define OLD_GENERATION (NUM_GENERATIONS-1) +static void +print_gc_times(GCState *gcstate) +{ + fprintf(stderr, "gc times: total %.3fs mark %.3fs max %ldus avg %ldus\n", + PyTime_AsSecondsDouble(gcstate->mark_state.gc_total_time), + PyTime_AsSecondsDouble(gcstate->mark_state.gc_mark_time), + _PyTime_AsMicroseconds(gcstate->mark_state.gc_max_pause, _PyTime_ROUND_HALF_EVEN), + _PyTime_AsMicroseconds(gcstate->mark_state.gc_total_time / gcstate->mark_state.gc_runs, _PyTime_ROUND_HALF_EVEN) + ); +} + static void maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) { GCState *gcstate = &tstate->interp->gc; PyGC_Head *old = GEN_HEAD(gcstate, OLD_GENERATION); + PyTime_t t1; + (void)PyTime_PerfCounterRaw(&t1); + if (generation == 0) { // Do one step of "mark alive" incremental work. This will look at // a fraction of 'old' to determine what is definitely alive, due // to being reachable from a known root object. Those objects will // be moved out of 'old' and into the 'old_alive' list. - mark_alive_step(tstate, old); + if (!gc_mark_disabled) { + mark_alive_step(tstate, old); + } } else { if (reason != _Py_GC_REASON_HEAP) { if (generation == OLD_GENERATION) { // if doing full, non-heap collection, merge old_alive list so // everything gets examined - gc_list_merge(&gcstate->old_alive, old); - gcstate->mark_phase = MARK_PHASE_INIT; + gc_list_merge(&gcstate->mark_state.old_alive, old); + gcstate->mark_state.mark_phase = MARK_PHASE_INIT; } } else { if (generation == OLD_GENERATION) { - float f = ((float)gcstate->old_alive_size) / gcstate->old_size; + float f = ((float)gcstate->mark_state.old_alive_size) / gcstate->mark_state.old_size; fprintf(stderr, "doing full auto collection %.2f\n", f); - fprintf(stderr, "steps %d %d\n", gcstate->mark_steps, gcstate->mark_steps_total); - gcstate->mark_phase = MARK_PHASE_INIT; + fprintf(stderr, "steps %d %d\n", gcstate->mark_state.mark_steps, gcstate->mark_state.mark_steps_total); + print_gc_times(gcstate); + gcstate->mark_state.mark_phase = MARK_PHASE_INIT; } } } + + PyTime_t t2; + (void)PyTime_PerfCounterRaw(&t2); + gcstate->mark_state.gc_mark_time += t2 - t1; } /* Deduce which objects among "base" are unreachable from outside the list @@ -1481,9 +1697,9 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) if (gcstate->debug & _PyGC_DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); show_stats_each_generations(gcstate); - // ignore error: don't interrupt the GC if reading the clock fails - (void)PyTime_PerfCounterRaw(&t1); } + // ignore error: don't interrupt the GC if reading the clock fails + (void)PyTime_PerfCounterRaw(&t1); if (PyDTrace_GC_START_ENABLED()) { PyDTrace_GC_START(generation); @@ -1630,6 +1846,18 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } #endif + PyTime_t t3, dt; + (void)PyTime_PerfCounterRaw(&t3); + dt = t3 - t1; + if (reason == _Py_GC_REASON_HEAP) { + p2engine_add(&gc_timing, (double)_PyTime_AsMicroseconds(dt, _PyTime_ROUND_HALF_EVEN)); + if (dt > gcstate->mark_state.gc_max_pause) { + gcstate->mark_state.gc_max_pause = dt; + } + } + gcstate->mark_state.gc_total_time += dt; + gcstate->mark_state.gc_runs++; + if (PyDTrace_GC_DONE_ENABLED()) { PyDTrace_GC_DONE(n + m); } @@ -1721,7 +1949,7 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) } if (generation == -1 || generation == (NUM_GENERATIONS - 1)) { // include the "old_alive" set of objects as well - if (append_objects(result, &gcstate->old_alive)) { + if (append_objects(result, &gcstate->mark_state.old_alive)) { goto error; } } @@ -1742,7 +1970,7 @@ _PyGC_GetOldAliveObjects(PyInterpreterState *interp) return NULL; } - if (append_objects(result, &gcstate->old_alive)) { + if (append_objects(result, &gcstate->mark_state.old_alive)) { Py_DECREF(result); return NULL; } @@ -1914,7 +2142,15 @@ _PyGC_Fini(PyInterpreterState *interp) finalize_unlink_gc_head(&gcstate->generations[i].head); } finalize_unlink_gc_head(&gcstate->permanent_generation.head); - finalize_unlink_gc_head(&gcstate->old_alive); + finalize_unlink_gc_head(&gcstate->mark_state.old_alive); + + print_gc_times(gcstate); + for (int i = 0; i < QUANTILE_COUNT; i++) { + double result = p2engine_result(&gc_timing, gc_timing_quantiles[i]); + fprintf(stderr, "gc timing Q%.0f: %.2f\n", gc_timing_quantiles[i]*100, result); + } + fprintf(stderr, "gc timing max: %.2f\n", gc_timing.max); + } /* for debugging */ From 04c2dbfc39ca972e50f99e03934b89cccb4c36bd Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 20 Oct 2024 19:09:31 -0700 Subject: [PATCH 5/7] wip: improve comments --- Include/internal/pycore_gc.h | 8 ++++++++ Python/gc.c | 24 +++++++++++++----------- Python/pylifecycle.c | 3 +-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 30305d35df24d3..48410adbe1f315 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -291,12 +291,19 @@ struct gc_mark_state { Py_ssize_t old_size; /* Number of alive objects found in oldest generation */ Py_ssize_t old_alive_size; + /* The phase of the mark process */ int mark_phase; + /* Number of incremental mark steps done */ int mark_steps; + /* Number of steps available before full collection */ int mark_steps_total; + /* Total time spent inside cyclic GC */ PyTime_t gc_total_time; + /* Time spent inside incremental mark part of cyclic GC */ PyTime_t gc_mark_time; + /* Maximum GC pause time */ PyTime_t gc_max_pause; + /* Total number of times GC was run */ PyTime_t gc_runs; }; @@ -322,6 +329,7 @@ struct _gc_runtime_state { PyObject *garbage; /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + /* state for the incremental "mark alive" logic */ struct gc_mark_state mark_state; /* This is the number of objects that survived the last full diff --git a/Python/gc.c b/Python/gc.c index d5de3db883124b..d7c7efe3ae7529 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -57,11 +57,11 @@ typedef struct _gc_runtime_state GCState; #define GENERATION_AUTO (-1) // phases of incremental mark alive process -#define MARK_PHASE_INIT 1 -#define MARK_PHASE_STEP 2 -#define MARK_PHASE_DONE 3 +#define MARK_PHASE_INIT 1 // start of new marking pass +#define MARK_PHASE_STEP 2 // incremental marking steps (done on each gen 0 collection) +#define MARK_PHASE_DONE 3 // marking has finished, wait for next full collection -#if 1 // gc timing +#if 1 // gc timing statistics (needs cleanup) // #define QUANTILE_COUNT 5 #define MARKER_COUNT (QUANTILE_COUNT * 3 + 2) @@ -248,6 +248,7 @@ P2Engine gc_timing; #endif // gc timing +// return True if object is part of the old (gen2) set static inline int gc_is_old(PyGC_Head *gc) { return ((gc->_gc_prev & _PyGC_PREV_MASK_OLD) != 0); } @@ -1269,6 +1270,7 @@ show_stats_each_generations(GCState *gcstate) buf, gc_list_size(&gcstate->permanent_generation.head)); } +// set the "old" flag on all objects in the old (gen 2) list static Py_ssize_t mark_old(PyGC_Head *head) { Py_ssize_t n = 0; @@ -1279,13 +1281,11 @@ mark_old(PyGC_Head *head) { return n; } -struct mark_state { - PyGC_Head todo; - Py_ssize_t alive; // number of objects found alive -}; - static int visit_reachable_old(PyObject *op, PyGC_Head *todo); +// Called on objects known to be alive (due to being reachable from known +// roots). Move objects to the 'old_alive' list. They will be excluded +// from the next full collection of old. static void propogate_old_reachable(PyObject *op, PyGC_Head *todo) { @@ -1308,6 +1308,7 @@ visit_reachable_old(PyObject *op, PyGC_Head *todo) return 0; } +// do one step of incremental "mark alive" processing static void mark_alive_step(PyThreadState *tstate, PyGC_Head *old_head) { @@ -1377,6 +1378,7 @@ print_gc_times(GCState *gcstate) ); } +// Run incremental "mark alive" process, as required. static void maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) { @@ -1406,8 +1408,8 @@ maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) else { if (generation == OLD_GENERATION) { float f = ((float)gcstate->mark_state.old_alive_size) / gcstate->mark_state.old_size; - fprintf(stderr, "doing full auto collection %.2f\n", f); - fprintf(stderr, "steps %d %d\n", gcstate->mark_state.mark_steps, gcstate->mark_state.mark_steps_total); + fprintf(stderr, "gc full auto collection %.2f\n", f); + fprintf(stderr, "gc steps needed=%d available=%d\n", gcstate->mark_state.mark_steps, gcstate->mark_state.mark_steps_total); print_gc_times(gcstate); gcstate->mark_state.mark_phase = MARK_PHASE_INIT; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 85920f8a8a25b5..645d5ed35820d5 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1151,6 +1151,7 @@ run_presite(PyThreadState *tstate) } #endif +extern void _PyGC_Freeze(PyInterpreterState *interp); static PyStatus init_interp_main(PyThreadState *tstate) @@ -2217,8 +2218,6 @@ Py_Finalize(void) (void)_Py_Finalize(&_PyRuntime); } -extern void _PyGC_Freeze(PyInterpreterState *interp); - /* Create and initialize a new interpreter and thread, and return the new thread. This requires that Py_Initialize() has been called first. From ec98e5785969e69c365b394d04622b48540d11c1 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 20 Oct 2024 19:46:39 -0700 Subject: [PATCH 6/7] wip: add timing stats for full collections --- Python/gc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index d7c7efe3ae7529..1ec20bfd8c4548 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -244,7 +244,9 @@ int main() { } #endif -P2Engine gc_timing; +// timings for automatic collections +P2Engine gc_timing; // all automatic collections +P2Engine gc_timing_full; // full (gen2) automatic collections #endif // gc timing @@ -363,6 +365,7 @@ _PyGC_Init(PyInterpreterState *interp) } p2engine_init(&gc_timing, gc_timing_quantiles); + p2engine_init(&gc_timing_full, gc_timing_quantiles); return _PyStatus_OK(); } @@ -1408,7 +1411,7 @@ maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) else { if (generation == OLD_GENERATION) { float f = ((float)gcstate->mark_state.old_alive_size) / gcstate->mark_state.old_size; - fprintf(stderr, "gc full auto collection %.2f\n", f); + fprintf(stderr, "gc full auto collection, alive fraction=%.2f\n", f); fprintf(stderr, "gc steps needed=%d available=%d\n", gcstate->mark_state.mark_steps, gcstate->mark_state.mark_steps_total); print_gc_times(gcstate); gcstate->mark_state.mark_phase = MARK_PHASE_INIT; @@ -1853,6 +1856,9 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) dt = t3 - t1; if (reason == _Py_GC_REASON_HEAP) { p2engine_add(&gc_timing, (double)_PyTime_AsMicroseconds(dt, _PyTime_ROUND_HALF_EVEN)); + if (generation == OLD_GENERATION) { + p2engine_add(&gc_timing_full, (double)_PyTime_AsMicroseconds(dt, _PyTime_ROUND_HALF_EVEN)); + } if (dt > gcstate->mark_state.gc_max_pause) { gcstate->mark_state.gc_max_pause = dt; } @@ -2149,9 +2155,12 @@ _PyGC_Fini(PyInterpreterState *interp) print_gc_times(gcstate); for (int i = 0; i < QUANTILE_COUNT; i++) { double result = p2engine_result(&gc_timing, gc_timing_quantiles[i]); - fprintf(stderr, "gc timing Q%.0f: %.2f\n", gc_timing_quantiles[i]*100, result); + fprintf(stderr, "gc timing all Q%.0f: %.2f\n", gc_timing_quantiles[i]*100, result); + } + for (int i = 0; i < QUANTILE_COUNT; i++) { + double result = p2engine_result(&gc_timing_full, gc_timing_quantiles[i]); + fprintf(stderr, "gc timing full Q%.0f: %.2f\n", gc_timing_quantiles[i]*100, result); } - fprintf(stderr, "gc timing max: %.2f\n", gc_timing.max); } From 12ceba2e6ed365b1204141a9121b4adb6232fcd8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Oct 2024 11:42:32 -0700 Subject: [PATCH 7/7] Add WITH_GC_MARK_ALIVE and WITH_GC_TIMING_STATS --- Include/internal/pycore_gc.h | 53 +++++++++++-- Python/gc.c | 147 +++++++++++++++++------------------ 2 files changed, 115 insertions(+), 85 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 48410adbe1f315..6f0faf6aa1b86f 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -282,6 +282,43 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +// if true, enable GC timing statistics +#define WITH_GC_TIMING_STATS 1 + +#if WITH_GC_TIMING_STATS + +#define QUANTILE_COUNT 5 +#define MARKER_COUNT (QUANTILE_COUNT * 3 + 2) + +typedef struct { + double q[MARKER_COUNT]; + double dn[MARKER_COUNT]; + double np[MARKER_COUNT]; + int n[MARKER_COUNT]; + int count; + double max; +} p2_engine; + +struct gc_timing_state { + /* timing statistics computed by P^2 algorithm */ + p2_engine auto_all; // timing for all automatic collections + p2_engine auto_full; // timing for full (gen2) automatic collections + /* Total time spent inside cyclic GC */ + PyTime_t gc_total_time; + /* Time spent inside incremental mark part of cyclic GC */ + PyTime_t gc_mark_time; + /* Maximum GC pause time */ + PyTime_t gc_max_pause; + /* Total number of times GC was run */ + PyTime_t gc_runs; +}; +#endif // WITH_GC_TIMING_STATS + + +// if true, enable the GC mark alive feature +#define WITH_GC_MARK_ALIVE 1 + +#if WITH_GC_MARK_ALIVE struct gc_mark_state { /* Objects in oldest generation that have be determined to be alive */ PyGC_Head old_alive; @@ -297,15 +334,9 @@ struct gc_mark_state { int mark_steps; /* Number of steps available before full collection */ int mark_steps_total; - /* Total time spent inside cyclic GC */ - PyTime_t gc_total_time; - /* Time spent inside incremental mark part of cyclic GC */ - PyTime_t gc_mark_time; - /* Maximum GC pause time */ - PyTime_t gc_max_pause; - /* Total number of times GC was run */ - PyTime_t gc_runs; }; +#endif // WITH_GC_MARK_ALIVE + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked @@ -329,8 +360,14 @@ struct _gc_runtime_state { PyObject *garbage; /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; +#if WITH_GC_MARK_ALIVE /* state for the incremental "mark alive" logic */ struct gc_mark_state mark_state; +#endif +#if WITH_GC_TIMING_STATS + /* state for GC timing statistics */ + struct gc_timing_state timing_state; +#endif /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects diff --git a/Python/gc.c b/Python/gc.c index 1ec20bfd8c4548..c1eeea34715f95 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -16,8 +16,6 @@ #include "pycore_time.h" // _PyTime_AsMicroseconds() #include "pydtrace.h" -static bool gc_mark_disabled; - #ifndef Py_GIL_DISABLED typedef struct _gc_runtime_state GCState; @@ -56,26 +54,9 @@ typedef struct _gc_runtime_state GCState; // Automatically choose the generation that needs collecting. #define GENERATION_AUTO (-1) -// phases of incremental mark alive process -#define MARK_PHASE_INIT 1 // start of new marking pass -#define MARK_PHASE_STEP 2 // incremental marking steps (done on each gen 0 collection) -#define MARK_PHASE_DONE 3 // marking has finished, wait for next full collection +#if WITH_GC_TIMING_STATS -#if 1 // gc timing statistics (needs cleanup) -// -#define QUANTILE_COUNT 5 -#define MARKER_COUNT (QUANTILE_COUNT * 3 + 2) - -typedef struct { - double q[MARKER_COUNT]; - double dn[MARKER_COUNT]; - double np[MARKER_COUNT]; - int n[MARKER_COUNT]; - int count; - double max; -} P2Engine; - -static void p2engine_init(P2Engine* engine, const double quantiles[QUANTILE_COUNT]) { +static void p2engine_init(p2_engine* engine, const double quantiles[QUANTILE_COUNT]) { engine->count = 0; engine->max = 0.0; @@ -111,17 +92,17 @@ static int sign(double d) { return (d >= 0) ? 1 : -1; } -static double parabolic(P2Engine* engine, int i, int d) { +static double parabolic(p2_engine* engine, int i, int d) { return engine->q[i] + d / ((engine->n[i + 1] - engine->n[i - 1]) * ((engine->n[i] - engine->n[i - 1] + d) * ((engine->q[i + 1] - engine->q[i]) / (engine->n[i + 1] - engine->n[i])) + (engine->n[i + 1] - (engine->n[i] - d)) * ((engine->q[i] - engine->q[i - 1]) / (engine->n[i] - engine->n[i - 1])))); } -static double linear(P2Engine* engine, int i, int d) { +static double linear(p2_engine* engine, int i, int d) { return engine->q[i] + d * ((engine->q[i + d] - engine->q[i]) / (engine->n[i + d] - engine->n[i])); } -static void p2engine_add(P2Engine* engine, double data) { +static void p2engine_add(p2_engine* engine, double data) { int i, k = 0; if (data > engine->max) { engine->max = data; @@ -189,7 +170,7 @@ static void p2engine_add(P2Engine* engine, double data) { } } -static double p2engine_result(P2Engine* engine, double quantile) { +static double p2engine_result(p2_engine* engine, double quantile) { if (engine->count < MARKER_COUNT) { int closest = 1; for (int i = 0; i < engine->count; i++) { @@ -221,34 +202,16 @@ static double p2engine_result(P2Engine* engine, double quantile) { static double gc_timing_quantiles[QUANTILE_COUNT] = {0.5, 0.75, 0.9, 0.95, 0.99}; -#if 0 -// Example usage -int main() { - P2Engine engine; - double quantiles[QUANTILE_COUNT] = {0.25, 0.5, 0.75, 0.9}; // quartiles and 90th percentile - p2engine_init(&engine, quantiles); - - // Generate random data and add to engine - for (int i = 0; i < 1000000; i++) { - double value = (double)rand() / RAND_MAX; - p2engine_add(&engine, value); - } - - printf("Approximate results:\n"); - for (int i = 0; i < QUANTILE_COUNT; i++) { - double result = p2engine_result(&engine, quantiles[i]); - printf("Q%.2f: %f\n", quantiles[i], result); - } +#endif // WITH_GC_TIMING_STATS - return 0; -} -#endif +#if WITH_GC_MARK_ALIVE -// timings for automatic collections -P2Engine gc_timing; // all automatic collections -P2Engine gc_timing_full; // full (gen2) automatic collections +// phases of incremental mark alive process +#define MARK_PHASE_INIT 0 // start of new marking pass +#define MARK_PHASE_STEP 1 // incremental marking steps (done on each gen 0 collection) +#define MARK_PHASE_DONE 2 // marking has finished, wait for next full collection -#endif // gc timing +static bool gc_mark_disabled; // return True if object is part of the old (gen2) set static inline int gc_is_old(PyGC_Head *gc) { @@ -263,6 +226,8 @@ static inline void gc_clear_old(PyGC_Head *gc) { gc->_gc_prev &= ~_PyGC_PREV_MASK_OLD; } +#endif // WITH_GC_MARK_ALIVE + static inline int gc_is_collecting(PyGC_Head *g) { @@ -332,7 +297,9 @@ _PyGC_InitState(GCState *gcstate) }; gcstate->generation0 = GEN_HEAD(gcstate, 0); INIT_GC_LIST(gcstate->permanent_generation.head); +#if WITH_GC_MARK_ALIVE INIT_GC_LIST(gcstate->mark_state.old_alive); +#endif #undef INIT_GC_LIST } @@ -353,6 +320,7 @@ _PyGC_Init(PyInterpreterState *interp) return _PyStatus_NO_MEMORY(); } +#if WITH_GC_MARK_ALIVE gcstate->mark_state.thumb = PyList_New(0); if (gcstate->mark_state.thumb == NULL) { return _PyStatus_NO_MEMORY(); @@ -363,9 +331,12 @@ _PyGC_Init(PyInterpreterState *interp) if (getenv("GC_MARK_DISABLED")) { gc_mark_disabled = 1; } +#endif // WITH_GC_MARK_ALIVE - p2engine_init(&gc_timing, gc_timing_quantiles); - p2engine_init(&gc_timing_full, gc_timing_quantiles); +#if WITH_GC_TIMING_STATS + p2engine_init(&gcstate->timing_state.auto_all, gc_timing_quantiles); + p2engine_init(&gcstate->timing_state.auto_full, gc_timing_quantiles); +#endif return _PyStatus_OK(); } @@ -1273,6 +1244,8 @@ show_stats_each_generations(GCState *gcstate) buf, gc_list_size(&gcstate->permanent_generation.head)); } +#if WITH_GC_MARK_ALIVE + // set the "old" flag on all objects in the old (gen 2) list static Py_ssize_t mark_old(PyGC_Head *head) { @@ -1339,9 +1312,7 @@ mark_alive_step(PyThreadState *tstate, PyGC_Head *old_head) step_count = 10; } gcstate->mark_state.mark_steps++; - unsigned long total_steps = 0; for (;;) { - total_steps++; PyGC_Head *next = GC_NEXT(gc); if (gc == alive_head) { gcstate->mark_state.mark_phase = MARK_PHASE_DONE; @@ -1368,23 +1339,28 @@ mark_alive_step(PyThreadState *tstate, PyGC_Head *old_head) gcstate->mark_state.mark_steps_total++; } +#endif // WITH_GC_MARK_ALIVE + #define OLD_GENERATION (NUM_GENERATIONS-1) static void print_gc_times(GCState *gcstate) { +#if WITH_GC_TIMING_STATS fprintf(stderr, "gc times: total %.3fs mark %.3fs max %ldus avg %ldus\n", - PyTime_AsSecondsDouble(gcstate->mark_state.gc_total_time), - PyTime_AsSecondsDouble(gcstate->mark_state.gc_mark_time), - _PyTime_AsMicroseconds(gcstate->mark_state.gc_max_pause, _PyTime_ROUND_HALF_EVEN), - _PyTime_AsMicroseconds(gcstate->mark_state.gc_total_time / gcstate->mark_state.gc_runs, _PyTime_ROUND_HALF_EVEN) + PyTime_AsSecondsDouble(gcstate->timing_state.gc_total_time), + PyTime_AsSecondsDouble(gcstate->timing_state.gc_mark_time), + (long)_PyTime_AsMicroseconds(gcstate->timing_state.gc_max_pause, _PyTime_ROUND_HALF_EVEN), + (long)_PyTime_AsMicroseconds(gcstate->timing_state.gc_total_time / gcstate->timing_state.gc_runs, _PyTime_ROUND_HALF_EVEN) ); +#endif // WITH_GC_TIMING_STATS } // Run incremental "mark alive" process, as required. static void maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) { +#if WITH_GC_MARK_ALIVE GCState *gcstate = &tstate->interp->gc; PyGC_Head *old = GEN_HEAD(gcstate, OLD_GENERATION); PyTime_t t1; @@ -1419,9 +1395,13 @@ maybe_mark_alive(PyThreadState *tstate, int generation, _PyGC_Reason reason) } } +#if WITH_GC_TIMING_STATS PyTime_t t2; (void)PyTime_PerfCounterRaw(&t2); - gcstate->mark_state.gc_mark_time += t2 - t1; + gcstate->timing_state.gc_mark_time += t2 - t1; +#endif + +#endif // WITH_GC_MARK_ALIVE } /* Deduce which objects among "base" are unreachable from outside the list @@ -1598,7 +1578,7 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, static int gc_select_generation(GCState *gcstate) { - for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + for (int i = OLD_GENERATION; i >= 0; i--) { if (gcstate->generations[i].count > gcstate->generations[i].threshold) { /* Avoid quadratic performance degradation in number of tracked objects (see also issue #4074): @@ -1636,7 +1616,7 @@ gc_select_generation(GCState *gcstate) June 2008. His original analysis and proposal can be found at: http://mail.python.org/pipermail/python-dev/2008-June/080579.html */ - if (i == NUM_GENERATIONS - 1 + if (i == OLD_GENERATION && gcstate->long_lived_pending < gcstate->long_lived_total / 4) { continue; @@ -1725,7 +1705,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) /* handy references */ young = GEN_HEAD(gcstate, generation); - if (generation < NUM_GENERATIONS-1) { + if (generation < OLD_GENERATION) { old = GEN_HEAD(gcstate, generation+1); } else { @@ -1823,7 +1803,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) /* Clear free list only during the collection of the highest * generation */ - if (generation == NUM_GENERATIONS-1) { + if (generation == OLD_GENERATION) { _PyGC_ClearAllFreeLists(tstate->interp); } @@ -1851,20 +1831,26 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } #endif +#if WITH_GC_TIMING_STATS PyTime_t t3, dt; (void)PyTime_PerfCounterRaw(&t3); dt = t3 - t1; if (reason == _Py_GC_REASON_HEAP) { - p2engine_add(&gc_timing, (double)_PyTime_AsMicroseconds(dt, _PyTime_ROUND_HALF_EVEN)); + p2engine_add(&gcstate->timing_state.auto_all, + (double)_PyTime_AsMicroseconds(dt, + _PyTime_ROUND_HALF_EVEN)); if (generation == OLD_GENERATION) { - p2engine_add(&gc_timing_full, (double)_PyTime_AsMicroseconds(dt, _PyTime_ROUND_HALF_EVEN)); + p2engine_add(&gcstate->timing_state.auto_full, + (double)_PyTime_AsMicroseconds(dt, + _PyTime_ROUND_HALF_EVEN)); } - if (dt > gcstate->mark_state.gc_max_pause) { - gcstate->mark_state.gc_max_pause = dt; + if (dt > gcstate->timing_state.gc_max_pause) { + gcstate->timing_state.gc_max_pause = dt; } } - gcstate->mark_state.gc_total_time += dt; - gcstate->mark_state.gc_runs++; + gcstate->timing_state.gc_total_time += dt; + gcstate->timing_state.gc_runs++; +#endif // WITH_GC_TIMING_STATS if (PyDTrace_GC_DONE_ENABLED()) { PyDTrace_GC_DONE(n + m); @@ -1955,12 +1941,14 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) goto error; } } - if (generation == -1 || generation == (NUM_GENERATIONS - 1)) { +#if WITH_GC_MARK_ALIVE + if (generation == -1 || generation == (OLD_GENERATION)) { // include the "old_alive" set of objects as well if (append_objects(result, &gcstate->mark_state.old_alive)) { goto error; } } +#endif return result; error: @@ -1971,17 +1959,18 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) PyObject * _PyGC_GetOldAliveObjects(PyInterpreterState *interp) { - GCState *gcstate = &interp->gc; - PyObject *result = PyList_New(0); if (result == NULL) { return NULL; } +#if WITH_GC_MARK_ALIVE + GCState *gcstate = &interp->gc; if (append_objects(result, &gcstate->mark_state.old_alive)) { Py_DECREF(result); return NULL; } +#endif return result; } @@ -2002,7 +1991,7 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - GEN_HEAD(gcstate, NUM_GENERATIONS-1)); + GEN_HEAD(gcstate, OLD_GENERATION)); } Py_ssize_t @@ -2051,7 +2040,7 @@ PyGC_Collect(void) Py_ssize_t n; PyObject *exc = _PyErr_GetRaisedException(tstate); - n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); + n = gc_collect_main(tstate, OLD_GENERATION, _Py_GC_REASON_MANUAL); _PyErr_SetRaisedException(tstate, exc); return n; @@ -2072,7 +2061,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); + gc_collect_main(tstate, OLD_GENERATION, _Py_GC_REASON_SHUTDOWN); } void @@ -2150,17 +2139,21 @@ _PyGC_Fini(PyInterpreterState *interp) finalize_unlink_gc_head(&gcstate->generations[i].head); } finalize_unlink_gc_head(&gcstate->permanent_generation.head); +#if WITH_GC_MARK_ALIVE finalize_unlink_gc_head(&gcstate->mark_state.old_alive); +#endif print_gc_times(gcstate); +#if WITH_GC_TIMING_STATS for (int i = 0; i < QUANTILE_COUNT; i++) { - double result = p2engine_result(&gc_timing, gc_timing_quantiles[i]); + double result = p2engine_result(&gcstate->timing_state.auto_all, gc_timing_quantiles[i]); fprintf(stderr, "gc timing all Q%.0f: %.2f\n", gc_timing_quantiles[i]*100, result); } for (int i = 0; i < QUANTILE_COUNT; i++) { - double result = p2engine_result(&gc_timing_full, gc_timing_quantiles[i]); + double result = p2engine_result(&gcstate->timing_state.auto_full, gc_timing_quantiles[i]); fprintf(stderr, "gc timing full Q%.0f: %.2f\n", gc_timing_quantiles[i]*100, result); } +#endif // WITH_GC_TIMING_STATS }