On 11/06/2013 01:06 AM, Dmitry V. Levin wrote:
> Looks like all FD_ISSET implementations just test the n-th bit in the
> array of long ints.  I've pushed a commit with yet another FD_ISSET
> implementation that hopefully does the same.
> 
>> On the related note, how are we doing in "stracing 32-bit app
>> with 64-bit strace on a big-endian machine" case?
>> In that case, sizeof(long) is important...
>> I dread to think about that:(
> 
> We cannot make things worse than they were since the beginning. :)

Testing each bit with FD_ISSET is silly in any case, so I attempted to fix
both "bit-endian wordsize bug" and performance problem in one go,
by introducing a next_set_bit() function which just gives us
a next bit's position.

It skips over entire zero bytes. By using bytes instead of words,
it can walk bytes in big-endian sequence suitable for current_wordsize
without the need of awkward code to access a runtime-variable-width word.

It also skips zero bytes even in not-fully zeroed words,
which may be a win on speed.

Run-tested, although I do not have the environment to reproduce
the bit-endian wordsize bug.

Dmitry, please review.

Patch #2 just syncs up select decoding in pathtrace.c with the main one.

-- 
vda

diff -d -urpN strace.0/defs.h strace.1/defs.h
--- strace.0/defs.h	2013-09-22 10:55:43.000000000 +0200
+++ strace.1/defs.h	2013-11-08 11:33:10.086820385 +0100
@@ -648,6 +648,7 @@ extern const char *xlookup(const struct
 
 extern int string_to_uint(const char *str);
 extern int string_quote(const char *, char *, long, int);
+extern int next_set_bit(const void *bit_array, unsigned cur_bit, unsigned size_bits);
 
 /* a refers to the lower numbered u_arg,
  * b refers to the higher numbered u_arg
diff -d -urpN strace.0/desc.c strace.1/desc.c
--- strace.0/desc.c	2013-11-06 11:33:14.000000000 +0100
+++ strace.1/desc.c	2013-11-08 11:55:15.583465898 +0100
@@ -477,22 +477,6 @@ sys_getdtablesize(struct tcb *tcp)
 }
 #endif
 
-/* FD_ISSET from libc would abort for large fd if built with
- * debug flags/library hacks which enforce array bound checks
- * (fd_set contains a fixed-size array of longs).
- * We need to use a homegrown replacement.
- */
-static inline int
-fd_isset(unsigned fd, fd_set *fds)
-{
-	/* Using unsigned types to avoid signed divisions and shifts,
-	 * which are slow(er) on many CPUs.
-	 */
-	const unsigned bpl = 8 * sizeof(long);
-	unsigned long *s = (unsigned long *) fds;
-	return s[fd / bpl] & (1UL << (fd % bpl));
-}
-
 static int
 decode_select(struct tcb *tcp, long *args, enum bitness_t bitness)
 {
@@ -518,7 +502,7 @@ decode_select(struct tcb *tcp, long *arg
 	 * We had bugs a-la "while (j < args[0])" and "umoven(args[0])" below.
 	 * Instead of args[0], use nfds for fd count, fdsize for array lengths.
 	 */
-	fdsize = (((nfds + 7) / 8) + sizeof(long)-1) & -sizeof(long);
+	fdsize = (((nfds + 7) / 8) + current_wordsize-1) & -current_wordsize;
 
 	if (entering(tcp)) {
 		tprintf("%d", (int) args[0]);
@@ -543,12 +527,13 @@ decode_select(struct tcb *tcp, long *arg
 				continue;
 			}
 			tprints(", [");
-			for (j = 0, sep = ""; j < nfds; j++) {
-				if (fd_isset(j, fds)) {
-					tprints(sep);
-					printfd(tcp, j);
-					sep = " ";
-				}
+			for (j = 0, sep = "";; j++) {
+				j = next_set_bit(fds, j, nfds);
+				if (j < 0)
+					break;
+				tprints(sep);
+				printfd(tcp, j);
+				sep = " ";
 			}
 			tprints("]");
 		}
@@ -583,26 +568,27 @@ decode_select(struct tcb *tcp, long *arg
 			arg = args[i+1];
 			if (!arg || umoven(tcp, arg, fdsize, (char *) fds) < 0)
 				continue;
-			for (j = 0; j < nfds; j++) {
-				if (fd_isset(j, fds)) {
-					/* +2 chars needed at the end: ']',NUL */
-					if (outptr < end_outstr - (sizeof(", except [") + sizeof(int)*3 + 2)) {
-						if (first) {
-							outptr += sprintf(outptr, "%s%s [%u",
-								sep,
-								i == 0 ? "in" : i == 1 ? "out" : "except",
-								j
-							);
-							first = 0;
-							sep = ", ";
-						}
-						else {
-							outptr += sprintf(outptr, " %u", j);
-						}
+			for (j = 0;; j++) {
+				j = next_set_bit(fds, j, nfds);
+				if (j < 0)
+					break;
+				/* +2 chars needed at the end: ']',NUL */
+				if (outptr < end_outstr - (sizeof(", except [") + sizeof(int)*3 + 2)) {
+					if (first) {
+						outptr += sprintf(outptr, "%s%s [%u",
+							sep,
+							i == 0 ? "in" : i == 1 ? "out" : "except",
+							j
+						);
+						first = 0;
+						sep = ", ";
+					}
+					else {
+						outptr += sprintf(outptr, " %u", j);
 					}
-					if (--ready_fds == 0)
-						break;
 				}
+				if (--ready_fds == 0)
+					break;
 			}
 			if (outptr != outstr)
 				*outptr++ = ']';
diff -d -urpN strace.0/util.c strace.1/util.c
--- strace.0/util.c	2013-11-05 11:37:22.000000000 +0100
+++ strace.1/util.c	2013-11-08 12:18:17.794530414 +0100
@@ -160,6 +160,53 @@ stpcpy(char *dst, const char *src)
 }
 #endif
 
+/* Find a next bit which is set.
+ * Starts testing at cur_bit.
+ * Returns -1 if no more bits are set.
+ *
+ * We never touch bytes we don't need to.
+ * On big-endian, array is assumed to consist of
+ * current_wordsize wide words: for example, is current_wordsize is 4,
+ * the bytes are walked in 3,2,1,0, 7,6,5,4, 11,10,9,8 ... sequence.
+ * On little-endian machines, word size is immaterial.
+ */
+int
+next_set_bit(const void *bit_array, unsigned cur_bit, unsigned size_bits)
+{
+	const unsigned endian = 1;
+	int little_endian = *(char*)&endian;
+
+	const uint8_t *array = bit_array;
+	unsigned pos = cur_bit / 8;
+	unsigned pos_xor_mask = little_endian ? 0 : current_wordsize-1;
+
+	for (;;) {
+		uint8_t bitmask;
+		uint8_t cur_byte;
+
+		if (cur_bit >= size_bits)
+			return -1;
+		cur_byte = array[pos ^ pos_xor_mask];
+		if (cur_byte == 0) {
+			cur_bit = (cur_bit + 8) & (-8);
+			pos++;
+			continue;
+		}
+		bitmask = 1 << (cur_bit & 7);
+		for (;;) {
+			if (cur_byte & bitmask)
+				return cur_bit;
+			cur_bit++;
+			if (cur_bit >= size_bits)
+				return -1;
+			bitmask <<= 1;
+			/* This check *can't be* optimized out: */
+			if (bitmask == 0)
+				break;
+		}
+		pos++;
+	}
+}
 /*
  * Print entry in struct xlat table, if there.
  */
diff -d -urpN strace.1/pathtrace.c strace.2/pathtrace.c
--- strace.1/pathtrace.c	2013-09-22 10:55:43.000000000 +0200
+++ strace.2/pathtrace.c	2013-11-08 11:53:46.047213903 +0100
@@ -257,11 +257,12 @@ pathtrace_match(struct tcb *tcp)
 	    s->sys_func == sys_pselect6)
 	{
 		int     i, j;
-		unsigned nfds;
+		int     nfds;
 		long   *args, oldargs[5];
 		unsigned fdsize;
 		fd_set *fds;
 
+		args = tcp->u_arg;
 		if (s->sys_func == sys_oldselect) {
 			if (umoven(tcp, tcp->u_arg[0], sizeof oldargs,
 				   (char*) oldargs) < 0)
@@ -270,17 +271,17 @@ pathtrace_match(struct tcb *tcp)
 				return 0;
 			}
 			args = oldargs;
-		} else
-			args = tcp->u_arg;
+		}
 
-		nfds = args[0];
+		/* Kernel truncates arg[0] to int, we do the same. */
+		nfds = (int) args[0];
+		/* Kernel rejects negative nfds, so we don't parse it either. */
+		if (nfds <= 0)
+			return 0;
 		/* Beware of select(2^31-1, NULL, NULL, NULL) and similar... */
-		if (args[0] > 1024*1024)
+		if (nfds > 1024*1024)
 			nfds = 1024*1024;
-		if (args[0] < 0)
-			nfds = 0;
-		fdsize = ((((nfds + 7) / 8) + sizeof(long) - 1)
-			  & -sizeof(long));
+		fdsize = (((nfds + 7) / 8) + current_wordsize-1) & -current_wordsize;
 		fds = malloc(fdsize);
 		if (!fds)
 			die_out_of_memory();
@@ -288,17 +289,19 @@ pathtrace_match(struct tcb *tcp)
 		for (i = 1; i <= 3; ++i) {
 			if (args[i] == 0)
 				continue;
-
 			if (umoven(tcp, args[i], fdsize, (char *) fds) < 0) {
 				fprintf(stderr, "umoven() failed\n");
 				continue;
 			}
-
-			for (j = 0; j < nfds; ++j)
-				if (FD_ISSET(j, fds) && fdmatch(tcp, j)) {
+			for (j = 0;; j++) {
+				j = next_set_bit(fds, j, nfds);
+				if (j < 0)
+					break;
+				if (fdmatch(tcp, j)) {
 					free(fds);
 					return 1;
 				}
+			}
 		}
 		free(fds);
 		return 0;
------------------------------------------------------------------------------
November Webinars for C, C++, Fortran Developers
Accelerate application performance with scalable programming models. Explore
techniques for threading, error checking, porting, and tuning. Get the most 
from the latest Intel processors and coprocessors. See abstracts and register
http://pubads.g.doubleclick.net/gampad/clk?id=60136231&iu=/4140/ostg.clktrk
_______________________________________________
Strace-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/strace-devel

Reply via email to