diff --git a/HACKING.rst b/HACKING.rst index d6a756b..c8ccebf 100644 --- a/HACKING.rst +++ b/HACKING.rst @@ -3,7 +3,7 @@ Hacking on objgraph =================== -Start by geting the latest source with :: +Start by getting the latest source with :: git clone https://github.com/mgedmin/objgraph @@ -65,7 +65,7 @@ to see how incomplete they are with :: make coverage I use a `vim plugin `_ -to higlight lines not covered by tests while I edit :: +to highlight lines not covered by tests while I edit :: make coverage vim objgraph.py diff --git a/docs/index.txt b/docs/index.txt index e022ade..2c09df1 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -123,6 +123,20 @@ It is perhaps surprising to find :mod:`linecache` at the end of that chain (apparently :mod:`doctest` monkey-patches it), but the important things -- :func:`computate_something` and its cache dictionary -- are in there. +In some cases, something might look like a memory leak but actually isn't. +The case I'm interested in here is a manifestation of `python issue 39061 + `_. Objects that do get in the garbage +collector generation 2 are only rarely collected and might end up using more +and more memory. + +Using `objgraph` in such a scenario actually hides the issue as a full garbage +collection is being run to cut the noise as much as possible. + +To avoid a full collection being run and hiding that memory leak look alike, +an option has been added to most of the API functions to only collect up to +a specific generation. The option is `gc_collect_gen` and is set to 2 by +default, which results in the default gc behavior. + There are other tools, perhaps better suited for memory leak hunting: `heapy `_, `Dozer `_. diff --git a/objgraph.py b/objgraph.py index 6c2c941..3039611 100755 --- a/objgraph.py +++ b/objgraph.py @@ -276,7 +276,8 @@ def show_most_common_types( file.write('%-*s %i\n' % (width, name, count)) -def growth(limit=10, peak_stats={}, shortnames=True, filter=None): +def growth(limit=10, peak_stats={}, shortnames=True, filter=None, + gc_collect_gen=2): """Count the increase in peak object since last call. Returns a list of (type_name, total_count, increase_delta), @@ -293,6 +294,9 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None): returning a boolean. Objects for which ``filter(obj)`` returns ``False`` will be ignored. + gc.collect() is called with ``gc_collect_gen``. The default is ``2``, thus + running a full collection. + The caveats documented in :func:`typestats` apply. Example: @@ -302,8 +306,11 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None): .. versionadded:: 3.3.0 + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ - gc.collect() + gc.collect(gc_collect_gen) stats = typestats(shortnames=shortnames, filter=filter) deltas = {} for name, count in iteritems(stats): @@ -320,7 +327,7 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None): def show_growth(limit=10, peak_stats=None, shortnames=True, file=None, - filter=None): + filter=None, gc_collect_gen=2): """Show the increase in peak object counts since last call. if ``peak_stats`` is None, peak object counts will recorded in @@ -348,11 +355,15 @@ def show_growth(limit=10, peak_stats=None, shortnames=True, file=None, .. versionchanged:: 3.1.3 New parameter: ``filter``. + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ if peak_stats is None: - result = growth(limit, shortnames=shortnames, filter=filter) + result = growth(limit, shortnames=shortnames, filter=filter, + gc_collect_gen=gc_collect_gen) else: - result = growth(limit, peak_stats, shortnames, filter) + result = growth(limit, peak_stats, shortnames, filter, gc_collect_gen) if result: if file is None: file = sys.stdout @@ -362,7 +373,7 @@ def show_growth(limit=10, peak_stats=None, shortnames=True, file=None, def get_new_ids(skip_update=False, limit=10, sortby='deltas', - shortnames=None, file=None, _state={}): + shortnames=None, file=None, _state={}, gc_collect_gen=2): """Find and display new objects allocated since last call. Shows the increase in object counts since last call to this @@ -392,6 +403,9 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', It is used by the function to store the internal state between calls. Never pass in this argument unless you know what you're doing. + ``gc_collect_gen`` (int from 0 to 2): used in the call to gc.collect() to + limit the collection to given generation and lower. + The caveats documented in :func:`growth` apply. When one gets new_ids from :func:`get_new_ids`, one can use @@ -420,6 +434,10 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', True .. versionadded:: 3.4 + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ if not _state: _state['old'] = collections.defaultdict(set) @@ -435,7 +453,7 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', shortnames = _state['shortnames'] else: _state['shortnames'] = shortnames - gc.collect() + gc.collect(gc_collect_gen) objects = gc.get_objects() for class_name in old_ids: old_ids[class_name].clear() @@ -494,7 +512,7 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', return new_ids -def get_leaking_objects(objects=None): +def get_leaking_objects(objects=None, gc_collect_gen=2): """Return objects that do not have any referents. These could indicate reference-counting bugs in C code. Or they could @@ -503,9 +521,13 @@ def get_leaking_objects(objects=None): Note that the GC does not track simple objects like int or str. .. versionadded:: 1.7 + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ if objects is None: - gc.collect() + gc.collect(gc_collect_gen) objects = gc.get_objects() try: ids = set(id(i) for i in objects) @@ -592,7 +614,8 @@ def at_addrs(address_set): return res -def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()): +def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=(), + gc_collect_gen=2): """Find a shortest chain of references leading from obj. The end of the chain will be some object that matches your predicate. @@ -604,6 +627,9 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()): ``extra_ignore`` can be a list of object IDs to exclude those objects from your search. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Example: >>> find_ref_chain(obj, lambda x: isinstance(x, MyClass)) @@ -612,12 +638,18 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()): Returns ``[obj]`` if such a chain could not be found. .. versionadded:: 1.7 + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ return _find_chain(obj, predicate, gc.get_referents, - max_depth=max_depth, extra_ignore=extra_ignore)[::-1] + max_depth=max_depth, extra_ignore=extra_ignore, + gc_collect_gen=gc_collect_gen)[::-1] -def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()): +def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=(), + gc_collect_gen=2): """Find a shortest chain of references leading to obj. The start of the chain will be some object that matches your predicate. @@ -629,6 +661,9 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()): ``extra_ignore`` can be a list of object IDs to exclude those objects from your search. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Example: >>> find_backref_chain(obj, is_proper_module) @@ -639,15 +674,19 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()): .. versionchanged:: 1.5 Returns ``obj`` instead of ``None`` when a chain could not be found. + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ return _find_chain(obj, predicate, gc.get_referrers, - max_depth=max_depth, extra_ignore=extra_ignore) + max_depth=max_depth, extra_ignore=extra_ignore, + gc_collect_gen=gc_collect_gen) def show_backrefs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, highlight=None, filename=None, extra_info=None, refcounts=False, shortnames=True, output=None, - extra_node_attrs=None): + extra_node_attrs=None, gc_collect_gen=2): """Generate an object reference graph ending at ``objs``. The graph will show you what objects refer to ``objs``, directly and @@ -693,6 +732,9 @@ def show_backrefs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, names ('package.module.ClassName'). By default you get to see only the class name part. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Examples: >>> show_backrefs(obj) @@ -717,6 +759,9 @@ class name part. .. versionchanged:: 3.5 New parameter: ``extra_node_attrs``. + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. """ # For show_backrefs(), it makes sense to stop when reaching a # module because you'll end up in sys.modules and explode the @@ -728,13 +773,14 @@ class name part. filename=filename, output=output, extra_info=extra_info, refcounts=refcounts, shortnames=shortnames, cull_func=is_proper_module, - extra_node_attrs=extra_node_attrs) + extra_node_attrs=extra_node_attrs, + gc_collect_gen=gc_collect_gen) def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, highlight=None, filename=None, extra_info=None, refcounts=False, shortnames=True, output=None, - extra_node_attrs=None): + extra_node_attrs=None, gc_collect_gen=2): """Generate an object reference graph starting at ``objs``. The graph will show you what objects are reachable from ``objs``, directly @@ -774,6 +820,9 @@ def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, Specify ``refcounts=True`` if you want to see reference counts. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Examples: >>> show_refs(obj) @@ -801,13 +850,17 @@ def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, .. versionchanged:: 3.5 New parameter: ``extra_node_attrs``. + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. """ return _show_graph(objs, max_depth=max_depth, extra_ignore=extra_ignore, filter=filter, too_many=too_many, highlight=highlight, edge_func=gc.get_referents, swap_source_target=True, filename=filename, extra_info=extra_info, refcounts=refcounts, shortnames=shortnames, - output=output, extra_node_attrs=extra_node_attrs) + output=output, extra_node_attrs=extra_node_attrs, + gc_collect_gen=gc_collect_gen) def show_chain(*chains, **kw): @@ -881,7 +934,8 @@ def is_proper_module(obj): # Internal helpers # -def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=()): +def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=(), + gc_collect_gen=2): queue = [obj] depth = {id(obj): 0} parent = {id(obj): None} @@ -893,7 +947,7 @@ def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=()): ignore.add(id(ignore)) ignore.add(id(sys._getframe())) # this function ignore.add(id(sys._getframe(1))) # find_chain/find_backref_chain - gc.collect() + gc.collect(gc_collect_gen) while queue: target = queue.pop(0) if predicate(target): @@ -920,7 +974,8 @@ def _show_graph(objs, edge_func, swap_source_target, max_depth=3, extra_ignore=(), filter=None, too_many=10, highlight=None, filename=None, extra_info=None, refcounts=False, shortnames=True, output=None, - cull_func=None, extra_node_attrs=None): + cull_func=None, extra_node_attrs=None, + gc_collect_gen=2): if not _isinstance(objs, (list, tuple)): objs = [objs] @@ -963,7 +1018,7 @@ def _show_graph(objs, edge_func, swap_source_target, depth[id(obj)] = 0 queue.append(obj) del obj - gc.collect() + gc.collect(gc_collect_gen) nodes = 0 while queue: nodes += 1 diff --git a/tests.py b/tests.py index 27a325a..1831811 100755 --- a/tests.py +++ b/tests.py @@ -339,6 +339,47 @@ def test_growth(self): self.assertEqual(1, len(cared)) self.assertEqual(1, cared[0][2]) + def test_growth_override_gc_collect_gen(self): + + """ + Inspiration taken from https://bugs.python.org/issue39061, attachment + late_gc.py + """ + class ApparentlyLeakingObj: + """Object keeping references to itself""" + def __init__(self): + self.create_cycle = self + + def trigger_memory_leak_look_alike(): + for i in range(1000): + apparently_leaking = ApparentlyLeakingObj() + # Instantiate n objects to free via garbage collection while + # "working" on that heavy object + for i in range(90): + light_cyclical_object = list() + light_cyclical_object.append(light_cyclical_object) + del apparently_leaking + + # First, make sure that when using the default garbage collection + # generation parameter, there is no memory leak look alike: there + # should not be any ApparentlyLeakingObj in the growth info + objgraph.growth(limit=None) + trigger_memory_leak_look_alike() + growth_info = objgraph.growth(limit=None) + + assert not any(record[0] == 'ApparentlyLeakingObj' + for record in growth_info) + + # Now, only collect up to generation 1, keeping the objects in + # generation 2 intact. There should be at least one + # ApparentlyLeakingObj in the growth info + objgraph.growth(limit=None, gc_collect_gen=1) + trigger_memory_leak_look_alike() + growth_info = objgraph.growth(limit=None, gc_collect_gen=1) + + assert any(record[0] == 'ApparentlyLeakingObj' + for record in growth_info) + def test_show_growth_custom_peak_stats(self): ps = {} objgraph.show_growth(peak_stats=ps, file=StringIO())