Author: Armin Rigo <[email protected]>
Branch: boehm-rawrefcount
Changeset: r86921:5706f541d3da
Date: 2016-09-07 11:00 +0200
http://bitbucket.org/pypy/pypy/changeset/5706f541d3da/
Log: Import the C code from
https://bitbucket.org/arigo/arigo/src/default/hack/pypy-hack/boehm-
rawrefcount/
diff --git a/rpython/rlib/src/boehm-rawrefcount.c
b/rpython/rlib/src/boehm-rawrefcount.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/src/boehm-rawrefcount.c
@@ -0,0 +1,258 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <gc/gc.h>
+#include <gc/gc_mark.h>
+
+
+#define REFCNT_FROM_PYPY (LONG_MAX / 4 + 1)
+
+typedef struct pypy_header0 gcobj_t; /* opaque here */
+
+#ifndef _WIN32
+typedef intptr_t Py_ssize_t;
+#else
+typedef long Py_ssize_t;
+#endif
+
+/* this is the first two words of the PyObject structure used in
+ pypy/module/cpyext */
+typedef struct {
+ Py_ssize_t ob_refcnt;
+ Py_ssize_t ob_pypy_link;
+} pyobj_t;
+
+struct link_s {
+ pyobj_t *pyobj; /* NULL if entry unused */
+ uintptr_t gcenc;
+ struct link_s *next_in_bucket;
+};
+
+#define MARKER_LIST_START ((pyobj_t *)-1)
+
+static struct link_s **hash_buckets, *hash_list, *hash_free_list;
+static uintptr_t hash_mask_bucket;
+static intptr_t hash_list_walk_next = -1;
+
+static uintptr_t hash_get_hash(gcobj_t *gcobj)
+{
+ assert(gcobj != NULL);
+ uintptr_t h = (uintptr_t)gcobj;
+ assert((h & 1) == 0);
+ h -= (h >> 6);
+ return h & hash_mask_bucket;
+}
+
+static gcobj_t *decode_gcenc(uintptr_t gcenc)
+{
+ if (gcenc & 1)
+ gcenc = ~gcenc;
+ return (gcobj_t *)gcenc;
+}
+
+static void hash_link(struct link_s *lnk)
+{
+ uintptr_t h = hash_get_hash(decode_gcenc(lnk->gcenc));
+ lnk->next_in_bucket = hash_buckets[h];
+ hash_buckets[h] = lnk;
+}
+
+static void boehm_is_about_to_collect(void);
+
+static void hash_grow_table(void)
+{
+ static int rec = 0;
+ assert(!rec); /* recursive hash_grow_table() */
+ rec = 1;
+
+ if (hash_buckets == NULL)
+ GC_set_start_callback(boehm_is_about_to_collect);
+
+ uintptr_t i, num_buckets = (hash_mask_bucket + 1) * 2;
+ if (num_buckets < 16) num_buckets = 16;
+ assert((num_buckets & (num_buckets - 1)) == 0); /* power of two */
+
+ /* The new hash_buckets: an array of pointers to struct link_s, of
+ length a power of two, used as a dictionary hash table. It is
+ not allocated with Boehm because there is no point in Boehm looking
+ in it.
+ */
+ struct link_s **new_buckets = calloc(num_buckets, sizeof(struct link_s *));
+ assert(new_buckets);
+
+ /* The new hash_list: the array of all struct link_s. Their order
+ is irrelevant. There is a GC_register_finalizer() on the 'gcenc'
+ field, so we don't move the array; instead we allocate a new array
+ to use in addition to the old one. There are a total of 2 to 4
+ times as many 'struct link_s' as the length of 'buckets'.
+ */
+ uintptr_t num_list = num_buckets * 2;
+ struct link_s *new_list = GC_MALLOC(num_list * sizeof(struct link_s));
+ for (i = num_list; i-- > 1; ) {
+ new_list[i].next_in_bucket = hash_free_list;
+ hash_free_list = &new_list[i];
+ }
+ /* list[0] is abused to store a pointer to the previous list and
+ the length of the current list */
+ struct link_s *old_list = hash_list;
+ new_list[0].next_in_bucket = old_list;
+ new_list[0].gcenc = num_list;
+ new_list[0].pyobj = MARKER_LIST_START;
+
+ hash_list = new_list;
+ free(hash_buckets);
+ hash_buckets = new_buckets;
+ hash_mask_bucket = num_buckets - 1;
+ hash_list_walk_next = hash_mask_bucket;
+
+ /* re-add all old 'struct link_s' to the hash_buckets */
+ struct link_s *plist = old_list;
+ while (plist != NULL) {
+ uintptr_t count = plist[0].gcenc;
+ for (i = 1; i < count; i++) {
+ if (plist[i].gcenc != 0)
+ hash_link(&plist[i]);
+ }
+ plist = plist[0].next_in_bucket;
+ }
+ GC_reachable_here(old_list);
+
+ rec = 0;
+}
+
+static void hash_add_entry(gcobj_t *gcobj, pyobj_t *pyobj)
+{
+ if (hash_free_list == NULL) {
+ hash_grow_table();
+ }
+ assert(pyobj->ob_pypy_link == 0);
+
+ struct link_s *lnk = hash_free_list;
+ hash_free_list = lnk->next_in_bucket;
+ lnk->pyobj = pyobj;
+ lnk->gcenc = (uintptr_t)gcobj;
+ pyobj->ob_pypy_link = (Py_ssize_t)lnk;
+
+ hash_link(lnk);
+
+ int j = GC_general_register_disappearing_link((void **)&lnk->gcenc, gcobj);
+ assert(j == GC_SUCCESS);
+}
+
+static pyobj_t *hash_get_entry(gcobj_t *gcobj)
+{
+ if (hash_buckets == NULL)
+ return NULL;
+ uintptr_t h = hash_get_hash(gcobj);
+ struct link_s *lnk = hash_buckets[h];
+ while (lnk != NULL) {
+ assert(lnk->pyobj != NULL);
+ if (decode_gcenc(lnk->gcenc) == gcobj)
+ return lnk->pyobj;
+ lnk = lnk->next_in_bucket;
+ }
+ return NULL;
+}
+
+
+RPY_EXTERN
+/*pyobj_t*/void *gc_rawrefcount_next_dead(void)
+{
+ while (hash_list_walk_next >= 0) {
+ struct link_s *p, **pp = &hash_buckets[hash_list_walk_next];
+ while (1) {
+ p = *pp;
+ if (p == NULL)
+ break;
+ assert(p->pyobj != NULL);
+ if (p->gcenc == 0) {
+ /* quadratic time on the number of links from the same
+ bucket chain, but it should be small with very high
+ probability */
+ pyobj_t *result = p->pyobj;
+ printf("next_dead: %p\n", result);
+ assert(result->ob_refcnt == REFCNT_FROM_PYPY);
+ p->pyobj = NULL;
+ *pp = p->next_in_bucket;
+ p->next_in_bucket = hash_free_list;
+ hash_free_list = p;
+ return result;
+ }
+ else {
+ assert(p->gcenc != ~(uintptr_t)0);
+ pp = &p->next_in_bucket;
+ }
+ }
+ hash_list_walk_next--;
+ }
+ return NULL;
+}
+
+RPY_EXTERN
+void gc_rawrefcount_create_link_pypy(/*gcobj_t*/void *gcobj,
+ /*pyobj_t*/void *pyobj)
+{
+ gcobj_t *gcobj1 = (gcobj_t *)gcobj;
+ pyobj_t *pyobj1 = (pyobj_t *)pyobj;
+
+ assert(pyobj1->ob_pypy_link == 0);
+ assert(pyobj1->ob_refcnt >= REFCNT_FROM_PYPY);
+
+ hash_add_entry(gcobj1, pyobj1);
+}
+
+RPY_EXTERN
+/*pyobj_t*/void *gc_rawrefcount_from_obj(/*gcobj_t*/void *gcobj)
+{
+ return hash_get_entry((gcobj_t *)gcobj);
+}
+
+RPY_EXTERN
+/*gcobj_t*/void *gc_rawrefcount_to_obj(/*pyobj_t*/void *pyobj)
+{
+ pyobj_t *pyobj1 = (pyobj_t *)pyobj;
+
+ if (pyobj1->ob_pypy_link == 0)
+ return NULL;
+
+ struct link_s *lnk = (struct link_s *)pyobj1->ob_pypy_link;
+ assert(lnk->pyobj == pyobj1);
+
+ gcobj_t *g = decode_gcenc(lnk->gcenc);
+ assert(g != NULL);
+ return g;
+}
+
+static void boehm_is_about_to_collect(void)
+{
+ struct link_s *plist = hash_list;
+ while (plist != NULL) {
+ uintptr_t i, count = plist[0].gcenc;
+ for (i = 1; i < count; i++) {
+ if (plist[i].gcenc == 0)
+ continue;
+
+ pyobj_t *p = plist[i].pyobj;
+ assert(p != NULL);
+ assert(p->ob_refcnt >= REFCNT_FROM_PYPY);
+
+ printf("plist[%d].gcenc: %p ", (int)i, plist[i].gcenc);
+
+ if ((plist[i].gcenc & 1) ^ (p->ob_refcnt == REFCNT_FROM_PYPY)) {
+ /* ob_refcnt > FROM_PYPY: non-zero regular refcnt,
+ the gc obj must stay alive. decode gcenc.
+ ---OR---
+ ob_refcnt == FROM_PYPY: no refs from C code, the
+ gc obj must not (necessarily) stay alive. encode gcenc.
+ */
+ plist[i].gcenc = ~plist[i].gcenc;
+ }
+ printf("-> %p\n", plist[i].gcenc);
+ }
+ plist = plist[0].next_in_bucket;
+ }
+ if (hash_mask_bucket > 0)
+ hash_list_walk_next = hash_mask_bucket;
+}
diff --git a/rpython/rlib/src/boehm-rawrefcount.h
b/rpython/rlib/src/boehm-rawrefcount.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/src/boehm-rawrefcount.h
@@ -0,0 +1,22 @@
+#include "common_header.h"
+
+#define OP_GC_RAWREFCOUNT_INIT(callback, r) /* nothing */
+
+#define OP_GC_RAWREFCOUNT_CREATE_LINK_PYPY(gcobj, pyobj, r) \
+ gc_rawrefcount_create_link_pypy(gcobj, pyobj)
+
+#define OP_GC_RAWREFCOUNT_FROM_OBJ(gcobj, r) \
+ r = gc_rawrefcount_from_obj(gcobj)
+
+#define OP_GC_RAWREFCOUNT_TO_OBJ(pyobj, r) \
+ r = gc_rawrefcount_to_obj(pyobj)
+
+#define OP_GC_RAWREFCOUNT_NEXT_DEAD(r) \
+ r = gc_rawrefcount_next()
+
+
+RPY_EXTERN void gc_rawrefcount_create_link_pypy(/*gcobj_t*/void *gcobj,
+ /*pyobj_t*/void *pyobj);
+RPY_EXTERN /*pyobj_t*/void *gc_rawrefcount_from_obj(/*gcobj_t*/void *gcobj);
+RPY_EXTERN /*gcobj_t*/void *gc_rawrefcount_to_obj(/*pyobj_t*/void *pyobj);
+RPY_EXTERN /*pyobj_t*/void *gc_rawrefcount_next_dead(void);
diff --git a/rpython/rlib/test/test_rawrefcount_boehm.py
b/rpython/rlib/test/test_rawrefcount_boehm.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/test/test_rawrefcount_boehm.py
@@ -0,0 +1,230 @@
+import itertools, os, subprocess
+from hypothesis import given, strategies
+from rpython.tool.udir import udir
+
+
+TEST_CODE = r"""
+#define RPY_EXTERN /* nothing */
+#include "boehm-rawrefcount.c"
+
+static gcobj_t *alloc_gcobj(void) /* for tests */
+{
+ gcobj_t *g = GC_MALLOC(1000);
+ printf("gc obj: %p\n", g);
+ return g;
+}
+
+static pyobj_t *alloc_pyobj(void) /* for tests */
+{
+ pyobj_t *p = malloc(1000);
+ p->ob_refcnt = 1;
+ p->ob_pypy_link = 0;
+ printf("py obj: %p\n", p);
+ return p;
+}
+
+static void decref(pyobj_t *p) /* for tests */
+{
+ p->ob_refcnt--;
+ if (p->ob_refcnt == 0) {
+ printf("decref to zero: %p\n", p);
+ free(p);
+ }
+ assert(p->ob_refcnt >= REFCNT_FROM_PYPY ||
+ p->ob_refcnt < REFCNT_FROM_PYPY * 0.99);
+}
+
+void run_test(void); /* forward declaration, produced by the test */
+
+int main(void)
+{
+ run_test();
+ while (gc_rawrefcount_next_dead() != NULL)
+ ;
+ return 0;
+}
+"""
+
+
+operations = strategies.sampled_from([
+ 'new_pyobj',
+ 'new_gcobj',
+ 'create_link',
+ 'from_obj',
+ 'to_obj',
+ 'forget_pyobj',
+ 'forget_gcobj',
+ 'collect',
+ 'dead',
+ ])
+
+
[email protected]
+def make_code(draw):
+ code = []
+ pyobjs = []
+ gcobjs = []
+ num_gcobj = itertools.count()
+ num_pyobj = itertools.count()
+ links_g2p = {}
+ links_p2g = {}
+
+ def new_gcobj():
+ varname = 'g%d' % next(num_gcobj)
+ code.append('gcobj_t *volatile %s = alloc_gcobj();' % varname)
+ gcobjs.append(varname)
+ return varname
+
+ def new_pyobj():
+ varname = 'p%d' % next(num_pyobj)
+ code.append('pyobj_t *%s = alloc_pyobj();' % varname)
+ pyobjs.append(varname)
+ return varname
+
+ for op in draw(strategies.lists(operations, average_size=250)):
+ if op == 'new_gcobj':
+ new_gcobj()
+ elif op == 'new_pyobj':
+ new_pyobj()
+ elif op == 'create_link':
+ gvars = [varname for varname in gcobjs if varname not in links_g2p]
+ if gvars == []:
+ gvars.append(new_gcobj())
+ pvars = [varname for varname in pyobjs if varname not in links_p2g]
+ if pvars == []:
+ pvars.append(new_pyobj())
+ gvar = draw(strategies.sampled_from(gvars))
+ pvar = draw(strategies.sampled_from(pvars))
+ code.append(r'printf("create_link %%p-%%p\n", %s, %s); '
+ % (gvar, pvar) +
+ "%s->ob_refcnt += REFCNT_FROM_PYPY; " % pvar +
+ "gc_rawrefcount_create_link_pypy(%s, %s);"
+ % (gvar, pvar))
+ links_g2p[gvar] = pvar
+ links_p2g[pvar] = gvar
+ elif op == 'from_obj':
+ if gcobjs:
+ prnt = False
+ gvar = draw(strategies.sampled_from(gcobjs))
+ if gvar not in links_g2p:
+ check = "== NULL"
+ elif links_g2p[gvar] in pyobjs:
+ check = "== %s" % (links_g2p[gvar],)
+ else:
+ check = "!= NULL"
+ prnt = True
+ code.append("assert(gc_rawrefcount_from_obj(%s) %s);"
+ % (gvar, check))
+ if prnt:
+ code.append(r'printf("link %%p-%%p\n", %s, '
+ 'gc_rawrefcount_from_obj(%s));' % (gvar, gvar))
+ elif op == 'to_obj':
+ if pyobjs:
+ prnt = False
+ pvar = draw(strategies.sampled_from(pyobjs))
+ if pvar not in links_p2g:
+ check = "== NULL"
+ elif links_p2g[pvar] in gcobjs:
+ check = "== %s" % (links_p2g[pvar],)
+ else:
+ check = "!= NULL"
+ prnt = True
+ code.append("assert(gc_rawrefcount_to_obj(%s) %s);"
+ % (pvar, check))
+ if prnt:
+ code.append(r'printf("link %%p-%%p\n", '
+ 'gc_rawrefcount_to_obj(%s), %s);' % (pvar, pvar))
+ elif op == 'forget_pyobj':
+ if pyobjs:
+ index = draw(strategies.sampled_from(range(len(pyobjs))))
+ pvar = pyobjs.pop(index)
+ code.append(r'printf("-p%%p\n", %s); ' % pvar +
+ "decref(%s); %s = NULL;" % (pvar, pvar))
+ elif op == 'forget_gcobj':
+ if gcobjs:
+ index = draw(strategies.sampled_from(range(len(gcobjs))))
+ gvar = gcobjs.pop(index)
+ code.append(r'printf("-g%%p\n", %s); ' % gvar +
+ "%s = NULL;" % (gvar,))
+ elif op == 'collect':
+ code.append("GC_gcollect();")
+ elif op == 'dead':
+ code.append('gc_rawrefcount_next_dead();')
+ else:
+ assert False, op
+
+ return '\n'.join(code)
+
+
+@given(make_code())
+def test_random(code):
+ filename = str(udir.join("test-rawrefcount-boehm.c"))
+ with open(filename, "w") as f:
+ print >> f, TEST_CODE
+ print >> f, 'void run_test(void) {'
+ print >> f, code
+ print >> f, '}'
+
+ srcdir = os.path.dirname(os.path.dirname(
+ os.path.abspath(os.path.join(__file__))))
+ srcdir = os.path.join(srcdir, 'src')
+
+ err = os.system("cd '%s' && gcc -Werror -lgc -I%s -o
test-rawrefcount-boehm"
+ " test-rawrefcount-boehm.c" % (udir, srcdir))
+ assert err == 0
+ p = subprocess.Popen("./test-rawrefcount-boehm", stdout=subprocess.PIPE,
+ cwd=str(udir))
+ stdout, _ = p.communicate()
+ assert p.wait() == 0
+
+ gcobjs = {}
+ pyobjs = {}
+ links_p2g = {}
+ links_g2p = {}
+ for line in stdout.splitlines():
+ if line.startswith('py obj: '):
+ p = line[8:]
+ assert not pyobjs.get(p)
+ pyobjs[p] = True
+ assert p not in links_p2g
+ elif line.startswith('gc obj: '):
+ g = line[8:]
+ assert not gcobjs.get(g)
+ gcobjs[g] = True
+ if g in links_g2p: del links_g2p[g]
+ elif line.startswith('-p'):
+ p = line[2:]
+ assert pyobjs[p] == True
+ pyobjs[p] = False
+ elif line.startswith('-g'):
+ g = line[2:]
+ assert gcobjs[g] == True
+ gcobjs[g] = False
+ elif line.startswith('decref to zero: '):
+ p = line[16:]
+ assert pyobjs[p] == False
+ assert p not in links_p2g
+ del pyobjs[p]
+ elif line.startswith('create_link '):
+ g, p = line[12:].split('-')
+ assert g in gcobjs
+ assert p in pyobjs
+ assert g not in links_g2p
+ assert p not in links_p2g
+ links_g2p[g] = p
+ links_p2g[p] = g
+ elif line.startswith('link '):
+ g, p = line[5:].split('-')
+ assert g in gcobjs
+ assert p in pyobjs
+ assert links_g2p[g] == p
+ assert links_p2g[p] == g
+ elif line.startswith('plist['):
+ pass
+ elif line.startswith('next_dead: '):
+ p = line[11:]
+ assert pyobjs[p] == False
+ del pyobjs[p]
+ del links_p2g[p]
+ else:
+ assert False, repr(line)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit