reassign 563882 libc6.1 2.10.2-5 severity 563882 critical retitle 563882 ia64: mmap reading null bytes that should not be there thanks
Hi libc maintainers, mmap() on ia64 seems to be totally broken. git does something like the following to detect binary files: struct stat st; lstat(path, &st); int fd = open(path, O_RDONLY); void *data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); binary = !!memchr(data, 0, st.st_size); munmap(data, st.st_size); That is, it maps the file into memory and looks for null bytes. Unfortunately, the test suite when run on merulo and mundy revealed that this was detecting various text files as binary. When mmapping two files in sequence, only the first seems to have this problem. Test case attached. Usage: compile with gcc -Wall -W -O -o generic-is-binary generic-is-binary.c Take your favorite text file M.out (see <http://bugs.debian.org/563882> and search for "-- %< -- M.out" for the example Andreas used to reproduce this) and run <M.out ./generic-is-binary M.out Then this program would lie to you and say “M.out is binary” | ametz...@merulo:/tmp$ uname -r | 2.6.32.2-dsa-ia64-mckinley | ametz...@merulo:/tmp$ dpkg -l libc6 [...] | in libc6 <none> (no description available) | ametz...@merulo:/tmp$ dpkg -l libc6.1 [...] | ii libc6.1 2.10.2-5 Embedded GNU C Library: Shared libraries | ametz...@merulo:/tmp$ gcc -Wall -W -O -o generic-is-binary generic-is-binary.c | ametz...@merulo:/tmp$ popd | /tmp/GIT/git-core-1.6.6-debug/t/trash directory.t1001-read-tree-m-2way | ametz...@merulo:/tmp/GIT/git-core-1.6.6-debug/t/trash directory.t1001-read-tree-m-2way$ <M.out /tmp/generic-is-binary M.out | stdin is not binary | M.out is binary See http://bugs.debian.org/563882 for the full story. This regression came in October of last year on caballero. [1] Any ideas? Andreas Metzler wrote: > okay: > ametz...@merulo:/tmp/GIT/git-core-1.6.6-debug/t/trash > directory.t1001-read-tree-m-2way$ <4.out > /tmp/GIT/git-core-1.6.6-debug/git-is-binary 4.out M.out > static buffer is not binary > stdin is not binary > 4.out is binary > M.out is not binary > ametz...@merulo:/tmp/GIT/git-core-1.6.6-debug/t/trash > directory.t1001-read-tree-m-2way$ cp M.out M2.out > ametz...@merulo:/tmp/GIT/git-core-1.6.6-debug/t/trash > directory.t1001-read-tree-m-2way$ <4.out > /tmp/GIT/git-core-1.6.6-debug/git-is-binary M.out M2.out > static buffer is not binary > stdin is not binary > M.out is binary > M2.out is not binary > ametz...@merulo:/tmp/GIT/git-core-1.6.6-debug/t/trash > directory.t1001-read-tree-m-2way$ <M2.out > /tmp/GIT/git-core-1.6.6-debug/git-is-binary M.out M2.out > static buffer is not binary > stdin is not binary > M.out is binary > M2.out is not binary Ugh, so it’s always the first mmap... >> If M.out (but not stdin) is reported to be binary, great: git is >> exonerated, and we have an independent test case. > > You win. ;-) Thank you! Reassigning to libc. I will leave the rest of the debugging to someone more knowledgeable about ia64/libc/linux-2.6. ;-) Thank you for your help tracking this down. You’ve had the patience of a saint. Regards, Jonathan [1] https://buildd.debian.org/build.php?&pkg=git-core&arch=ia64
#include <stdlib.h> #include <string.h> #include <stdarg.h> #include <stdio.h> #include <errno.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <unistd.h> #include <fcntl.h> static int xprintf(const char *fmt, ...); static int buffer_is_binary(const char *ptr, size_t size); static int check_stdin(void); static int check_file(const char *path); int main(int argc, const char * const argv[]) { int result = 0; if (argc != 2) { fprintf(stderr, "usage: generic-is-binary <path> < <path>\n"); exit(1); } result |= check_stdin(); result |= check_file(argv[1]); return result; } static int buffer_is_binary(const char *ptr, size_t sz) { return !!memchr(ptr, 0, sz); } static int check_stdin(void) { static char in_buf[8000]; char *bufp = in_buf; char *buf_end = in_buf + sizeof(in_buf); ssize_t n; while ((n = read(0, bufp, buf_end - bufp))) { if (n < 0) { perror("stdin: read"); return -1; } bufp += n; } return xprintf("stdin is%s binary\n", buffer_is_binary(in_buf, bufp - in_buf) ? "" : " not"); } static ssize_t size(const char *path) { struct stat st; if (lstat(path, &st) < 0) { fprintf(stderr, "%s: lstat: %s\n", path, strerror(errno)); return -1; } return st.st_size; } static int check_file(const char *path) { ssize_t sz; int fd, result; void *data; if ((sz = size(path)) < 0) return -1; if ((fd = open(path, O_RDONLY)) < 0) { fprintf(stderr, "%s: open: %s\n", path, strerror(errno)); return -1; } data = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0); if (data == MAP_FAILED) { fprintf(stderr, "%s: mmap: %s\n", path, strerror(errno)); close(fd); return -1; } if (close(fd)) { fprintf(stderr, "%s: close: %s\n", path, strerror(errno)); return -1; } result = xprintf("%s is%s binary\n", path, buffer_is_binary(data, sz) ? "" : " not"); if (munmap(data, sz)) { fprintf(stderr, "%s: munmap: %s\n", path, strerror(errno)); return -1; } return result; } static int xprintf(const char *fmt, ...) { int result = 0; va_list ap; va_start(ap, fmt); if (vprintf(fmt, ap) < 0) result = -1; va_end(ap); return result; }