Module Name: src
Committed By: joerg
Date: Fri Mar 19 16:48:55 UTC 2010
Modified Files:
src/sys/sys: bitops.h
Log Message:
Add functions for replacing runtime invariant 32bit unsigned divisions
with simpler full width multiplications and shifts + adds. The main
operations are generally at least 50% faster when serialised and often
better for parallelism as well.
To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/sys/bitops.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/sys/bitops.h
diff -u src/sys/sys/bitops.h:1.2 src/sys/sys/bitops.h:1.3
--- src/sys/sys/bitops.h:1.2 Mon Apr 28 20:24:10 2008
+++ src/sys/sys/bitops.h Fri Mar 19 16:48:55 2010
@@ -1,11 +1,11 @@
-/* $NetBSD: bitops.h,v 1.2 2008/04/28 20:24:10 martin Exp $ */
+/* $NetBSD: bitops.h,v 1.3 2010/03/19 16:48:55 joerg Exp $ */
/*-
- * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * Copyright (c) 2007, 2010 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
- * by Christos Zoulas.
+ * by Christos Zoulas and Joerg Sonnenberger.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,6 +31,8 @@
#ifndef _SYS_BITOPS_H_
#define _SYS_BITOPS_H_
+#include <sys/stdint.h>
+
/*
* Find First Set functions
*/
@@ -256,4 +258,36 @@
-1) : ((sizeof(_n) >= 4 ? fls64(_n) : fls32(_n)) - 1) \
)
+static inline void
+fast_divide32_prepare(uint32_t _div, uint32_t * __restrict _m,
+ uint8_t *__restrict _s1, uint8_t *__restrict _s2)
+{
+ uint64_t _mt;
+ int _l;
+
+ _l = fls32(_div - 1);
+ _mt = 0x100000000ULL * ((1ULL << _l) - _div);
+ *_m = _mt / _div + 1;
+ *_s1 = (_l > 1) ? 1 : _l;
+ *_s2 = (_l == 0) ? 0 : _l - 1;
+}
+
+static inline uint32_t
+fast_divide32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1,
+ uint8_t _s2)
+{
+ uint32_t _t;
+
+ _t = ((uint64_t)_v * _m) >> 32;
+ return (_t + ((_v - _t) >> _s1)) >> _s2;
+}
+
+static inline uint32_t
+fast_remainder32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1,
+ uint8_t _s2)
+{
+
+ return _v - _div * fast_divide32(_v, _div, _m, _s1, _s2);
+}
+
#endif /* _SYS_BITOPS_H_ */