diff --git a/HACKING.rst b/HACKING.rst
index d6a756b..c8ccebf 100644
--- a/HACKING.rst
+++ b/HACKING.rst
@@ -3,7 +3,7 @@
Hacking on objgraph
===================
-Start by geting the latest source with ::
+Start by getting the latest source with ::
git clone https://github.com/mgedmin/objgraph
@@ -65,7 +65,7 @@ to see how incomplete they are with ::
make coverage
I use a `vim plugin `_
-to higlight lines not covered by tests while I edit ::
+to highlight lines not covered by tests while I edit ::
make coverage
vim objgraph.py
diff --git a/docs/index.txt b/docs/index.txt
index e022ade..2c09df1 100644
--- a/docs/index.txt
+++ b/docs/index.txt
@@ -123,6 +123,20 @@ It is perhaps surprising to find :mod:`linecache` at the end of that chain
(apparently :mod:`doctest` monkey-patches it), but the important things --
:func:`computate_something` and its cache dictionary -- are in there.
+In some cases, something might look like a memory leak but actually isn't.
+The case I'm interested in here is a manifestation of `python issue 39061
+ `_. Objects that do get in the garbage
+collector generation 2 are only rarely collected and might end up using more
+and more memory.
+
+Using `objgraph` in such a scenario actually hides the issue as a full garbage
+collection is being run to cut the noise as much as possible.
+
+To avoid a full collection being run and hiding that memory leak look alike,
+an option has been added to most of the API functions to only collect up to
+a specific generation. The option is `gc_collect_gen` and is set to 2 by
+default, which results in the default gc behavior.
+
There are other tools, perhaps better suited for memory leak hunting:
`heapy `_,
`Dozer `_.
diff --git a/objgraph.py b/objgraph.py
index 6c2c941..3039611 100755
--- a/objgraph.py
+++ b/objgraph.py
@@ -276,7 +276,8 @@ def show_most_common_types(
file.write('%-*s %i\n' % (width, name, count))
-def growth(limit=10, peak_stats={}, shortnames=True, filter=None):
+def growth(limit=10, peak_stats={}, shortnames=True, filter=None,
+ gc_collect_gen=2):
"""Count the increase in peak object since last call.
Returns a list of (type_name, total_count, increase_delta),
@@ -293,6 +294,9 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None):
returning a boolean. Objects for which ``filter(obj)`` returns ``False``
will be ignored.
+ gc.collect() is called with ``gc_collect_gen``. The default is ``2``, thus
+ running a full collection.
+
The caveats documented in :func:`typestats` apply.
Example:
@@ -302,8 +306,11 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None):
.. versionadded:: 3.3.0
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
+
"""
- gc.collect()
+ gc.collect(gc_collect_gen)
stats = typestats(shortnames=shortnames, filter=filter)
deltas = {}
for name, count in iteritems(stats):
@@ -320,7 +327,7 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None):
def show_growth(limit=10, peak_stats=None, shortnames=True, file=None,
- filter=None):
+ filter=None, gc_collect_gen=2):
"""Show the increase in peak object counts since last call.
if ``peak_stats`` is None, peak object counts will recorded in
@@ -348,11 +355,15 @@ def show_growth(limit=10, peak_stats=None, shortnames=True, file=None,
.. versionchanged:: 3.1.3
New parameter: ``filter``.
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
+
"""
if peak_stats is None:
- result = growth(limit, shortnames=shortnames, filter=filter)
+ result = growth(limit, shortnames=shortnames, filter=filter,
+ gc_collect_gen=gc_collect_gen)
else:
- result = growth(limit, peak_stats, shortnames, filter)
+ result = growth(limit, peak_stats, shortnames, filter, gc_collect_gen)
if result:
if file is None:
file = sys.stdout
@@ -362,7 +373,7 @@ def show_growth(limit=10, peak_stats=None, shortnames=True, file=None,
def get_new_ids(skip_update=False, limit=10, sortby='deltas',
- shortnames=None, file=None, _state={}):
+ shortnames=None, file=None, _state={}, gc_collect_gen=2):
"""Find and display new objects allocated since last call.
Shows the increase in object counts since last call to this
@@ -392,6 +403,9 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas',
It is used by the function to store the internal state between calls.
Never pass in this argument unless you know what you're doing.
+ ``gc_collect_gen`` (int from 0 to 2): used in the call to gc.collect() to
+ limit the collection to given generation and lower.
+
The caveats documented in :func:`growth` apply.
When one gets new_ids from :func:`get_new_ids`, one can use
@@ -420,6 +434,10 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas',
True
.. versionadded:: 3.4
+
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
+
"""
if not _state:
_state['old'] = collections.defaultdict(set)
@@ -435,7 +453,7 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas',
shortnames = _state['shortnames']
else:
_state['shortnames'] = shortnames
- gc.collect()
+ gc.collect(gc_collect_gen)
objects = gc.get_objects()
for class_name in old_ids:
old_ids[class_name].clear()
@@ -494,7 +512,7 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas',
return new_ids
-def get_leaking_objects(objects=None):
+def get_leaking_objects(objects=None, gc_collect_gen=2):
"""Return objects that do not have any referents.
These could indicate reference-counting bugs in C code. Or they could
@@ -503,9 +521,13 @@ def get_leaking_objects(objects=None):
Note that the GC does not track simple objects like int or str.
.. versionadded:: 1.7
+
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
+
"""
if objects is None:
- gc.collect()
+ gc.collect(gc_collect_gen)
objects = gc.get_objects()
try:
ids = set(id(i) for i in objects)
@@ -592,7 +614,8 @@ def at_addrs(address_set):
return res
-def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()):
+def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=(),
+ gc_collect_gen=2):
"""Find a shortest chain of references leading from obj.
The end of the chain will be some object that matches your predicate.
@@ -604,6 +627,9 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()):
``extra_ignore`` can be a list of object IDs to exclude those objects from
your search.
+ ``gc_collect_gen`` specifies the generation to be collected in the call to
+ gc.collect(). The default is to run a full collection.
+
Example:
>>> find_ref_chain(obj, lambda x: isinstance(x, MyClass))
@@ -612,12 +638,18 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()):
Returns ``[obj]`` if such a chain could not be found.
.. versionadded:: 1.7
+
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
+
"""
return _find_chain(obj, predicate, gc.get_referents,
- max_depth=max_depth, extra_ignore=extra_ignore)[::-1]
+ max_depth=max_depth, extra_ignore=extra_ignore,
+ gc_collect_gen=gc_collect_gen)[::-1]
-def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()):
+def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=(),
+ gc_collect_gen=2):
"""Find a shortest chain of references leading to obj.
The start of the chain will be some object that matches your predicate.
@@ -629,6 +661,9 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()):
``extra_ignore`` can be a list of object IDs to exclude those objects from
your search.
+ ``gc_collect_gen`` specifies the generation to be collected in the call to
+ gc.collect(). The default is to run a full collection.
+
Example:
>>> find_backref_chain(obj, is_proper_module)
@@ -639,15 +674,19 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()):
.. versionchanged:: 1.5
Returns ``obj`` instead of ``None`` when a chain could not be found.
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
+
"""
return _find_chain(obj, predicate, gc.get_referrers,
- max_depth=max_depth, extra_ignore=extra_ignore)
+ max_depth=max_depth, extra_ignore=extra_ignore,
+ gc_collect_gen=gc_collect_gen)
def show_backrefs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10,
highlight=None, filename=None, extra_info=None,
refcounts=False, shortnames=True, output=None,
- extra_node_attrs=None):
+ extra_node_attrs=None, gc_collect_gen=2):
"""Generate an object reference graph ending at ``objs``.
The graph will show you what objects refer to ``objs``, directly and
@@ -693,6 +732,9 @@ def show_backrefs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10,
names ('package.module.ClassName'). By default you get to see only the
class name part.
+ ``gc_collect_gen`` specifies the generation to be collected in the call to
+ gc.collect(). The default is to run a full collection.
+
Examples:
>>> show_backrefs(obj)
@@ -717,6 +759,9 @@ class name part.
.. versionchanged:: 3.5
New parameter: ``extra_node_attrs``.
+
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
"""
# For show_backrefs(), it makes sense to stop when reaching a
# module because you'll end up in sys.modules and explode the
@@ -728,13 +773,14 @@ class name part.
filename=filename, output=output, extra_info=extra_info,
refcounts=refcounts, shortnames=shortnames,
cull_func=is_proper_module,
- extra_node_attrs=extra_node_attrs)
+ extra_node_attrs=extra_node_attrs,
+ gc_collect_gen=gc_collect_gen)
def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10,
highlight=None, filename=None, extra_info=None,
refcounts=False, shortnames=True, output=None,
- extra_node_attrs=None):
+ extra_node_attrs=None, gc_collect_gen=2):
"""Generate an object reference graph starting at ``objs``.
The graph will show you what objects are reachable from ``objs``, directly
@@ -774,6 +820,9 @@ def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10,
Specify ``refcounts=True`` if you want to see reference counts.
+ ``gc_collect_gen`` specifies the generation to be collected in the call to
+ gc.collect(). The default is to run a full collection.
+
Examples:
>>> show_refs(obj)
@@ -801,13 +850,17 @@ def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10,
.. versionchanged:: 3.5
New parameter: ``extra_node_attrs``.
+
+ .. versionchanged:: 3.6
+ New parameter: ``gc_collect_gen``.
"""
return _show_graph(objs, max_depth=max_depth, extra_ignore=extra_ignore,
filter=filter, too_many=too_many, highlight=highlight,
edge_func=gc.get_referents, swap_source_target=True,
filename=filename, extra_info=extra_info,
refcounts=refcounts, shortnames=shortnames,
- output=output, extra_node_attrs=extra_node_attrs)
+ output=output, extra_node_attrs=extra_node_attrs,
+ gc_collect_gen=gc_collect_gen)
def show_chain(*chains, **kw):
@@ -881,7 +934,8 @@ def is_proper_module(obj):
# Internal helpers
#
-def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=()):
+def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=(),
+ gc_collect_gen=2):
queue = [obj]
depth = {id(obj): 0}
parent = {id(obj): None}
@@ -893,7 +947,7 @@ def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=()):
ignore.add(id(ignore))
ignore.add(id(sys._getframe())) # this function
ignore.add(id(sys._getframe(1))) # find_chain/find_backref_chain
- gc.collect()
+ gc.collect(gc_collect_gen)
while queue:
target = queue.pop(0)
if predicate(target):
@@ -920,7 +974,8 @@ def _show_graph(objs, edge_func, swap_source_target,
max_depth=3, extra_ignore=(), filter=None, too_many=10,
highlight=None, filename=None, extra_info=None,
refcounts=False, shortnames=True, output=None,
- cull_func=None, extra_node_attrs=None):
+ cull_func=None, extra_node_attrs=None,
+ gc_collect_gen=2):
if not _isinstance(objs, (list, tuple)):
objs = [objs]
@@ -963,7 +1018,7 @@ def _show_graph(objs, edge_func, swap_source_target,
depth[id(obj)] = 0
queue.append(obj)
del obj
- gc.collect()
+ gc.collect(gc_collect_gen)
nodes = 0
while queue:
nodes += 1
diff --git a/tests.py b/tests.py
index 27a325a..1831811 100755
--- a/tests.py
+++ b/tests.py
@@ -339,6 +339,47 @@ def test_growth(self):
self.assertEqual(1, len(cared))
self.assertEqual(1, cared[0][2])
+ def test_growth_override_gc_collect_gen(self):
+
+ """
+ Inspiration taken from https://bugs.python.org/issue39061, attachment
+ late_gc.py
+ """
+ class ApparentlyLeakingObj:
+ """Object keeping references to itself"""
+ def __init__(self):
+ self.create_cycle = self
+
+ def trigger_memory_leak_look_alike():
+ for i in range(1000):
+ apparently_leaking = ApparentlyLeakingObj()
+ # Instantiate n objects to free via garbage collection while
+ # "working" on that heavy object
+ for i in range(90):
+ light_cyclical_object = list()
+ light_cyclical_object.append(light_cyclical_object)
+ del apparently_leaking
+
+ # First, make sure that when using the default garbage collection
+ # generation parameter, there is no memory leak look alike: there
+ # should not be any ApparentlyLeakingObj in the growth info
+ objgraph.growth(limit=None)
+ trigger_memory_leak_look_alike()
+ growth_info = objgraph.growth(limit=None)
+
+ assert not any(record[0] == 'ApparentlyLeakingObj'
+ for record in growth_info)
+
+ # Now, only collect up to generation 1, keeping the objects in
+ # generation 2 intact. There should be at least one
+ # ApparentlyLeakingObj in the growth info
+ objgraph.growth(limit=None, gc_collect_gen=1)
+ trigger_memory_leak_look_alike()
+ growth_info = objgraph.growth(limit=None, gc_collect_gen=1)
+
+ assert any(record[0] == 'ApparentlyLeakingObj'
+ for record in growth_info)
+
def test_show_growth_custom_peak_stats(self):
ps = {}
objgraph.show_growth(peak_stats=ps, file=StringIO())