Diff below add elf support to (bsd) strings(1) and make it usable for architectures with ELF_TOOLCHAIN=Yes.
I've been working on cross arch support for various build utils (ar, ranlib, nm...) and this diff is the first of a series. Tested on amd64, macppc and vax. It also gives a correct offset on my macppc machine where the actual (binutils) strings(1) always gives a negative offset. Elf bits are taken from nm(1)'s sources and I followed the same logic to process a.out/elf{32,64} files. I didn't reuse nm's elf.c because of the global variables. Some love may be needed to prevent duplication of code. This diff also includes some cosmetic modifications. If interested I can submit more organic patches. Here is the commit log that can be found on my git repo [0]: Uninitialized variable, found by llvm static analyzer. Improve error handling Correct format strings now that we use an off_t for the offset. Cosmetic, no functional change. File offset is no longer a global variable Small changes when printing filenames and offsets to behave like GNU strings(1). Add support for ELF format using OpenBSD's nm(1) elf functions. Rewrite find_strings() to use a `size` parameter instead of the global read_len variable. It will be used later to specify elf's section size. Use an union for file headers in order to support both elf and a.out formats. While here clean the header selection function. Move the strings search in a separate function, no functional change. Move the logic in a separate function, no functional change. Comments? Martin [0] https://gitorious.org/buildutils/strings Index: Makefile =================================================================== RCS file: /cvs/src/usr.bin/strings/Makefile,v retrieving revision 1.3 diff -u -p -r1.3 Makefile --- Makefile 21 Sep 1997 11:51:00 -0000 1.3 +++ Makefile 5 Apr 2011 06:19:56 -0000 @@ -1,5 +1,14 @@ # $OpenBSD: Makefile,v 1.3 1997/09/21 11:51:00 deraadt Exp $ PROG= strings +SRCS= strings.c elf32.c elf64.c + +CLEANFILES+= elf32.c elf64.c + +elf32.c: ${.CURDIR}/elf.c + echo '#define ELFSIZE 32' | cat - $> > ${.TARGET} + +elf64.c: ${.CURDIR}/elf.c + echo '#define ELFSIZE 64' | cat - $> > ${.TARGET} .include <bsd.prog.mk> Index: elf.c =================================================================== RCS file: elf.c diff -N elf.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ elf.c 5 Apr 2011 06:19:56 -0000 @@ -0,0 +1,172 @@ +/* $OpenBSD: elf.c,v 1.19 2009/10/27 23:59:41 deraadt Exp $ */ + +/* + * Copyright (c) 2003 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/mman.h> + +#include <a.out.h> +#include <ctype.h> +#include <elf_abi.h> +#include <errno.h> +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "elfuncs.h" + +#if ELFSIZE == 32 +#define swap_addr swap32 +#define swap_off swap32 +#define swap_sword swap32 +#define swap_word swap32 +#define swap_sxword swap32 +#define swap_xword swap32 +#define swap_half swap16 +#define swap_quarter swap16 +#define elf_fix_header elf32_fix_header +#define elf_load_shdrs elf32_load_shdrs +#define elf_fix_shdrs elf32_fix_shdrs +#elif ELFSIZE == 64 +#define swap_addr swap64 +#define swap_off swap64 +#ifdef __alpha__ +#define swap_sword swap64 +#define swap_word swap64 +#else +#define swap_sword swap32 +#define swap_word swap32 +#endif +#define swap_sxword swap64 +#define swap_xword swap64 +#define swap_half swap64 +#define swap_quarter swap16 +#define elf_fix_header elf64_fix_header +#define elf_load_shdrs elf64_load_shdrs +#define elf_fix_shdrs elf64_fix_shdrs +#else +#error "Unsupported ELF class" +#endif + +#ifndef SHN_MIPS_ACOMMON +#define SHN_MIPS_ACOMMON SHN_LOPROC + 0 +#endif +#ifndef SHN_MIPS_TEXT +#define SHN_MIPS_TEXT SHN_LOPROC + 1 +#endif +#ifndef SHN_MIPS_DATA +#define SHN_MIPS_DATA SHN_LOPROC + 2 +#endif +#ifndef SHN_MIPS_SUNDEFINED +#define SHN_MIPS_SUNDEFINED SHN_LOPROC + 4 +#endif +#ifndef SHN_MIPS_SCOMMON +#define SHN_MIPS_SCOMMON SHN_LOPROC + 3 +#endif + +#ifndef STT_PARISC_MILLI +#define STT_PARISC_MILLI STT_LOPROC + 0 +#endif + +int +elf_fix_header(Elf_Ehdr *eh) +{ + /* nothing to do */ + if (eh->e_ident[EI_DATA] == ELF_TARG_DATA) + return (0); + + eh->e_type = swap16(eh->e_type); + eh->e_machine = swap16(eh->e_machine); + eh->e_version = swap32(eh->e_version); + eh->e_entry = swap_addr(eh->e_entry); + eh->e_phoff = swap_off(eh->e_phoff); + eh->e_shoff = swap_off(eh->e_shoff); + eh->e_flags = swap32(eh->e_flags); + eh->e_ehsize = swap16(eh->e_ehsize); + eh->e_phentsize = swap16(eh->e_phentsize); + eh->e_phnum = swap16(eh->e_phnum); + eh->e_shentsize = swap16(eh->e_shentsize); + eh->e_shnum = swap16(eh->e_shnum); + eh->e_shstrndx = swap16(eh->e_shstrndx); + + return (1); +} + +Elf_Shdr * +elf_load_shdrs(const char *name, FILE *fp, off_t foff, Elf_Ehdr *head) +{ + Elf_Shdr *shdr = NULL; + + elf_fix_header(head); + + if ((shdr = calloc(head->e_shentsize, head->e_shnum)) == NULL) { + warn("%s: malloc shdr", name); + return (NULL); + } + + if (fseeko(fp, foff + head->e_shoff, SEEK_SET)) { + warn("%s: fseeko", name); + free(shdr); + return (NULL); + } + + if (fread(shdr, head->e_shentsize, head->e_shnum, fp) != head->e_shnum) { + warnx("%s: premature EOF", name); + free(shdr); + return (NULL); + } + + elf_fix_shdrs(head, shdr); + return (shdr); +} + +int +elf_fix_shdrs(Elf_Ehdr *eh, Elf_Shdr *shdr) +{ + int i; + + /* nothing to do */ + if (eh->e_ident[EI_DATA] == ELF_TARG_DATA) + return (0); + + for (i = eh->e_shnum; i--; shdr++) { + shdr->sh_name = swap32(shdr->sh_name); + shdr->sh_type = swap32(shdr->sh_type); + shdr->sh_flags = swap_xword(shdr->sh_flags); + shdr->sh_addr = swap_addr(shdr->sh_addr); + shdr->sh_offset = swap_off(shdr->sh_offset); + shdr->sh_size = swap_xword(shdr->sh_size); + shdr->sh_link = swap32(shdr->sh_link); + shdr->sh_info = swap32(shdr->sh_info); + shdr->sh_addralign = swap_xword(shdr->sh_addralign); + shdr->sh_entsize = swap_xword(shdr->sh_entsize); + } + + return (1); +} Index: elfuncs.h =================================================================== RCS file: elfuncs.h diff -N elfuncs.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ elfuncs.h 5 Apr 2011 06:19:56 -0000 @@ -0,0 +1,35 @@ +/* $OpenBSD: elfuncs.h,v 1.3 2006/09/30 14:34:13 kettenis Exp $ */ + +/* + * Copyright (c) 2004 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +int elf32_fix_header(Elf32_Ehdr *eh); +Elf32_Shdr*elf32_load_shdrs(const char *, FILE *, off_t, Elf32_Ehdr *); +int elf32_fix_shdrs(Elf32_Ehdr *eh, Elf32_Shdr *shdr); + +int elf64_fix_header(Elf64_Ehdr *eh); +Elf64_Shdr*elf64_load_shdrs(const char *, FILE *, off_t, Elf64_Ehdr *); +int elf64_fix_shdrs(Elf64_Ehdr *eh, Elf64_Shdr *shdr); Index: strings.c =================================================================== RCS file: /cvs/src/usr.bin/strings/strings.c,v retrieving revision 1.15 diff -u -p -r1.15 strings.c --- strings.c 27 Oct 2009 23:59:43 -0000 1.15 +++ strings.c 5 Apr 2011 06:20:04 -0000 @@ -34,46 +34,53 @@ #include <a.out.h> #include <ctype.h> +#include <elf_abi.h> +#include <err.h> #include <errno.h> #include <fcntl.h> +#include <locale.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <locale.h> #include <unistd.h> -#include <err.h> -#define FORMAT_DEC "%07ld " -#define FORMAT_OCT "%07lo " -#define FORMAT_HEX "%07lx " +#include "elfuncs.h" + +#define FORMAT_DEC "%7lld " +#define FORMAT_OCT "%7llo " +#define FORMAT_HEX "%7llx " #define DEF_LEN 4 /* default minimum string length */ #define ISSTR(ch) (isascii(ch) && (isprint(ch) || ch == '\t')) -typedef struct exec EXEC; /* struct exec cast */ - -static long foff; /* offset in the file */ -static int hcnt, /* head count */ - head_len, /* length of header */ - read_len; /* length to read */ -static u_char hbfr[sizeof(EXEC)]; /* buffer for struct exec */ - -static void usage(void); -int getch(void); +union hdr { + struct exec aout; + Elf32_Ehdr elf32; + Elf64_Ehdr elf64; +}; + +static int hcnt, /* head count */ + head_len; /* length of header */ +static u_char hbuf[sizeof(union hdr)]; /* buffer for header */ + +short scan_entirely; +short print_name; +char *offset_format; +int minlen, maxlen, buflen; + +static void usage(void); +int getch(void); +int process_file(const char *, u_char *); +void find_strings(const char *, u_char *, off_t, off_t); int main(int argc, char *argv[]) { - extern char *optarg; - extern int optind; - int ch, cnt; - u_char *C; - EXEC *head; - int exitcode, minlen, maxlen, bfrlen; - short asdata, fflg; - u_char *bfr; - char *file, *p; - char *offset_format; + extern char *optarg; + extern int optind; + int ch, rval = 0; + u_char *buf; + char *file, *p; setlocale(LC_ALL, ""); @@ -81,7 +88,7 @@ main(int argc, char *argv[]) * for backward compatibility, allow '-' to specify 'a' flag; no * longer documented in the man page or usage string. */ - asdata = exitcode = fflg = 0; + scan_entirely = print_name = 0; offset_format = NULL; minlen = -1; maxlen = -1; @@ -103,10 +110,10 @@ main(int argc, char *argv[]) break; case '-': case 'a': - asdata = 1; + scan_entirely = 1; break; case 'f': - fflg = 1; + print_name = 1; break; case 'n': minlen = atoi(optarg); @@ -136,6 +143,7 @@ main(int argc, char *argv[]) case '?': default: usage(); + /* NOTREACHED */ } argc -= optind; argv += optind; @@ -146,85 +154,163 @@ main(int argc, char *argv[]) errx(1, "length less than 1"); if (maxlen != -1 && maxlen < minlen) errx(1, "max length less than min"); - bfrlen = maxlen == -1 ? minlen : maxlen; - bfr = malloc(bfrlen + 1); - if (!bfr) + + buflen = (maxlen == -1) ? minlen : maxlen; + if ((buf = malloc(buflen + 1)) == NULL) err(1, "malloc"); - bfr[bfrlen] = '\0'; + buf[buflen] = '\0'; + file = "stdin"; do { if (*argv) { file = *argv++; if (!freopen(file, "r", stdin)) { warn("%s", file); - exitcode = 1; - goto nextfile; + rval = 1; + continue; } } - foff = 0; -#define DO_EVERYTHING() {read_len = -1; head_len = 0; goto start;} - read_len = -1; - if (asdata) - DO_EVERYTHING() - else { - head = (EXEC *)hbfr; - if ((head_len = - read(fileno(stdin), head, sizeof(EXEC))) == -1) - DO_EVERYTHING() - if (head_len == sizeof(EXEC) && !N_BADMAG(*head)) { - foff = N_TXTOFF(*head); - if (fseek(stdin, foff, SEEK_SET) == -1) - DO_EVERYTHING() - read_len = head->a_text + head->a_data; - head_len = 0; + rval |= process_file(file, buf); + } while (*argv); + + exit(rval); +} + +int +process_file(const char *file, u_char *buf) +{ + union hdr *head; + size_t bytes; + off_t foff = 0, offset = 0; + int i, rval = 0; + + head_len = 0; + + if (scan_entirely) { + find_strings(file, buf, 0, 0); + return (0); + } + + head = (union hdr*)hbuf; + bzero(head, sizeof(*head)); + + bytes = fread((char *)head, 1, sizeof(*head), stdin); + if (bytes == -1) { + find_strings(file, buf, 0, 0); + return (1); + } + + if (bytes >= sizeof(head->aout) && !N_BADMAG(head->aout)) { + foff = N_TXTOFF(head->aout); + if (fseeko(stdin, foff, SEEK_SET)) { + warn("%s: fseeko", file); + rval = errno; + find_strings(file, buf, 0, 0); + return (rval); + } + offset = head->aout.a_text + head->aout.a_data; + } else if (IS_ELF(head->elf32) && + head->elf32.e_ident[EI_CLASS] == ELFCLASS32 && + head->elf32.e_ident[EI_VERSION] == ELF_TARG_VER) { + Elf32_Shdr *shdrs; + Elf32_Ehdr *h = &head->elf32; + + if ((shdrs = elf32_load_shdrs(file, stdin, 0, h)) == NULL) + return (1); + + for (i = 1; i < h->e_shnum; i++) { + if (shdrs[i].sh_type != SHT_NOBITS && + (shdrs[i].sh_flags & SHF_ALLOC)) { + foff = shdrs[i].sh_offset; + offset = foff + shdrs[i].sh_size; + if (fseeko(stdin, foff, SEEK_SET)) { + warn("%s: fseeko", file); + rval = errno; + break; + } + find_strings(file, buf, foff, offset); } - else - hcnt = 0; } -start: - for (cnt = 0, C = bfr; (ch = getch()) != EOF;) { - if (ISSTR(ch)) { - *C++ = ch; - if (++cnt < minlen) - continue; - if (maxlen != -1) { - while ((ch = getch()) != EOF && - ISSTR(ch) && cnt++ < maxlen) - *C++ = ch; - if (ch == EOF || - (ch != 0 && ch != '\n')) { - /* get all of too big string */ - while ((ch = getch()) != EOF && - ISSTR(ch)) - ; - ungetc(ch, stdin); - goto out; - } - *C = 0; + + free(shdrs); + return (rval); + } else if (IS_ELF(head->elf64) && + head->elf64.e_ident[EI_CLASS] == ELFCLASS64 && + head->elf64.e_ident[EI_VERSION] == ELF_TARG_VER) { + Elf64_Shdr *shdrs; + Elf64_Ehdr *h = &head->elf64; + + if ((shdrs = elf64_load_shdrs(file, stdin, 0, h)) == NULL) + return (1); + + for (i = 1; i < h->e_shnum; i++) { + if (shdrs[i].sh_type != SHT_NOBITS && + (shdrs[i].sh_flags & SHF_ALLOC)) { + foff = shdrs[i].sh_offset; + offset = foff + shdrs[i].sh_size; + if (fseeko(stdin, foff, SEEK_SET)) { + warn("%s: fseeko", file); + rval = errno; + break; } + find_strings(file, buf, foff, offset); + } + } + + free(shdrs); + return (rval); + } else { + head_len = bytes; + hcnt = 0; + } - if (fflg) - printf("%s:", file); + find_strings(file, buf, foff, offset); - if (offset_format) - printf(offset_format, foff - minlen); + return (0); +} - printf("%s", bfr); - - if (maxlen == -1) - while ((ch = getch()) != EOF && - ISSTR(ch)) - putchar((char)ch); - putchar('\n'); - out: - ; - } - cnt = 0; - C = bfr; +void +find_strings(const char *filename, u_char *buf, off_t foff, off_t offset) +{ + int i, c = 0; + + while (c != EOF && (foff < offset || !offset)) { + for (i = 0; i < buflen; i++) { + c = getch(); + foff++; + if (!ISSTR(c) || foff == offset) + break; + buf[i] = c; + buf[i + 1] = '\0'; } -nextfile: ; - } while (*argv); - exit(exitcode); + + if (i >= minlen) { + if (print_name) + printf("%s: ", filename); + + if (offset_format) + printf(offset_format, (foff - i)); + + printf("%s", buf); + + if (maxlen == -1) + while (c != EOF && (foff < offset || !offset)) { + c = getch(); + foff++; + if (!ISSTR(c) || foff == offset) + break; + putchar(c); + } + putchar('\n'); + + /* Eat all the string if it's too big */ + if (i == maxlen && (c != '\n' || c != '\0')) + while ((foff < offset || !offset) && ISSTR(c)) { + c = getch(); + foff++; + } + } + } } /* @@ -234,21 +320,23 @@ nextfile: ; int getch(void) { - ++foff; if (head_len) { if (hcnt < head_len) - return((int)hbfr[hcnt++]); + return ((int)hbuf[hcnt++]); head_len = 0; } - if (read_len == -1 || read_len-- > 0) - return(getchar()); - return(EOF); + + return (getchar()); } static void usage(void) { + extern char *__progname; + (void)fprintf(stderr, - "usage: strings [-afo] [-m number] [-n number] [-t radix] [file ...]\n"); + "usage: %s [-afo] [-m number] [-n number] [-t radix] [file ...]\n", + __progname); + exit(1); }