Diff below add elf support to (bsd) strings(1) and make it usable for
architectures with ELF_TOOLCHAIN=Yes. 

I've been working on cross arch support for various build utils (ar,
ranlib, nm...) and this diff is the first of a series.

Tested on amd64, macppc and vax. It also gives a correct offset on my
macppc machine where the actual (binutils) strings(1) always gives a
negative offset.

Elf bits are taken from nm(1)'s sources and I followed the same logic
to process a.out/elf{32,64} files. I didn't reuse nm's elf.c because of
the global variables. Some love may be needed to prevent duplication of 
code.

This diff also includes some cosmetic modifications. If interested I can
submit more organic patches. Here is the commit log that can be found on
my git repo [0]:

 Uninitialized variable, found by llvm static analyzer.
 Improve error handling
 Correct format strings now that we use an off_t for the offset.
 Cosmetic, no functional change.
 File offset is no longer a global variable
 Small changes when printing filenames and offsets to behave like GNU 
strings(1).
 Add support for ELF format using OpenBSD's nm(1) elf functions.
 Rewrite find_strings() to use a `size` parameter instead of the global 
read_len variable. It will be used later to specify elf's section size.
 Use an union for file headers in order to support both elf and a.out formats. 
While here clean the header selection function.
 Move the strings search in a separate function, no functional change.
 Move the logic in a separate function, no functional change.


Comments?

Martin

[0] https://gitorious.org/buildutils/strings


Index: Makefile
===================================================================
RCS file: /cvs/src/usr.bin/strings/Makefile,v
retrieving revision 1.3
diff -u -p -r1.3 Makefile
--- Makefile    21 Sep 1997 11:51:00 -0000      1.3
+++ Makefile    5 Apr 2011 06:19:56 -0000
@@ -1,5 +1,14 @@
 #      $OpenBSD: Makefile,v 1.3 1997/09/21 11:51:00 deraadt Exp $
 
 PROG=  strings
+SRCS=  strings.c elf32.c elf64.c
+
+CLEANFILES+=   elf32.c elf64.c
+
+elf32.c: ${.CURDIR}/elf.c
+       echo '#define ELFSIZE 32' | cat - $> > ${.TARGET}
+
+elf64.c: ${.CURDIR}/elf.c
+       echo '#define ELFSIZE 64' | cat - $> > ${.TARGET}
 
 .include <bsd.prog.mk>
Index: elf.c
===================================================================
RCS file: elf.c
diff -N elf.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ elf.c       5 Apr 2011 06:19:56 -0000
@@ -0,0 +1,172 @@
+/*     $OpenBSD: elf.c,v 1.19 2009/10/27 23:59:41 deraadt Exp $        */
+
+/*
+ * Copyright (c) 2003 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <a.out.h>
+#include <ctype.h>
+#include <elf_abi.h>
+#include <errno.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "elfuncs.h"
+
+#if ELFSIZE == 32
+#define        swap_addr       swap32
+#define        swap_off        swap32
+#define        swap_sword      swap32
+#define        swap_word       swap32
+#define        swap_sxword     swap32
+#define        swap_xword      swap32
+#define        swap_half       swap16
+#define        swap_quarter    swap16
+#define        elf_fix_header  elf32_fix_header
+#define        elf_load_shdrs  elf32_load_shdrs
+#define        elf_fix_shdrs   elf32_fix_shdrs
+#elif ELFSIZE == 64
+#define        swap_addr       swap64
+#define        swap_off        swap64
+#ifdef __alpha__
+#define        swap_sword      swap64
+#define        swap_word       swap64
+#else
+#define        swap_sword      swap32
+#define        swap_word       swap32
+#endif
+#define        swap_sxword     swap64
+#define        swap_xword      swap64
+#define        swap_half       swap64
+#define        swap_quarter    swap16
+#define        elf_fix_header  elf64_fix_header
+#define        elf_load_shdrs  elf64_load_shdrs
+#define        elf_fix_shdrs   elf64_fix_shdrs
+#else
+#error "Unsupported ELF class"
+#endif
+
+#ifndef        SHN_MIPS_ACOMMON
+#define        SHN_MIPS_ACOMMON        SHN_LOPROC + 0
+#endif
+#ifndef        SHN_MIPS_TEXT
+#define        SHN_MIPS_TEXT           SHN_LOPROC + 1
+#endif
+#ifndef        SHN_MIPS_DATA
+#define        SHN_MIPS_DATA           SHN_LOPROC + 2
+#endif
+#ifndef        SHN_MIPS_SUNDEFINED
+#define        SHN_MIPS_SUNDEFINED     SHN_LOPROC + 4
+#endif
+#ifndef        SHN_MIPS_SCOMMON
+#define        SHN_MIPS_SCOMMON        SHN_LOPROC + 3
+#endif
+
+#ifndef        STT_PARISC_MILLI
+#define        STT_PARISC_MILLI        STT_LOPROC + 0
+#endif
+
+int
+elf_fix_header(Elf_Ehdr *eh)
+{
+       /* nothing to do */
+       if (eh->e_ident[EI_DATA] == ELF_TARG_DATA)
+               return (0);
+
+       eh->e_type = swap16(eh->e_type);
+       eh->e_machine = swap16(eh->e_machine);
+       eh->e_version = swap32(eh->e_version);
+       eh->e_entry = swap_addr(eh->e_entry);
+       eh->e_phoff = swap_off(eh->e_phoff);
+       eh->e_shoff = swap_off(eh->e_shoff);
+       eh->e_flags = swap32(eh->e_flags);
+       eh->e_ehsize = swap16(eh->e_ehsize);
+       eh->e_phentsize = swap16(eh->e_phentsize);
+       eh->e_phnum = swap16(eh->e_phnum);
+       eh->e_shentsize = swap16(eh->e_shentsize);
+       eh->e_shnum = swap16(eh->e_shnum);
+       eh->e_shstrndx = swap16(eh->e_shstrndx);
+
+       return (1);
+}
+
+Elf_Shdr *
+elf_load_shdrs(const char *name, FILE *fp, off_t foff, Elf_Ehdr *head)
+{
+       Elf_Shdr *shdr = NULL;
+
+       elf_fix_header(head);
+
+       if ((shdr = calloc(head->e_shentsize, head->e_shnum)) == NULL) {
+               warn("%s: malloc shdr", name);
+               return (NULL);
+       }
+
+       if (fseeko(fp, foff + head->e_shoff, SEEK_SET)) {
+               warn("%s: fseeko", name);
+               free(shdr);
+               return (NULL);
+       }
+
+       if (fread(shdr, head->e_shentsize, head->e_shnum, fp) != head->e_shnum) 
{
+               warnx("%s: premature EOF", name);
+               free(shdr);
+               return (NULL);
+       }
+
+       elf_fix_shdrs(head, shdr);
+       return (shdr);
+}
+
+int
+elf_fix_shdrs(Elf_Ehdr *eh, Elf_Shdr *shdr)
+{
+       int i;
+
+       /* nothing to do */
+       if (eh->e_ident[EI_DATA] == ELF_TARG_DATA)
+               return (0);
+
+       for (i = eh->e_shnum; i--; shdr++) {
+               shdr->sh_name = swap32(shdr->sh_name);
+               shdr->sh_type = swap32(shdr->sh_type);
+               shdr->sh_flags = swap_xword(shdr->sh_flags);
+               shdr->sh_addr = swap_addr(shdr->sh_addr);
+               shdr->sh_offset = swap_off(shdr->sh_offset);
+               shdr->sh_size = swap_xword(shdr->sh_size);
+               shdr->sh_link = swap32(shdr->sh_link);
+               shdr->sh_info = swap32(shdr->sh_info);
+               shdr->sh_addralign = swap_xword(shdr->sh_addralign);
+               shdr->sh_entsize = swap_xword(shdr->sh_entsize);
+       }
+
+       return (1);
+}
Index: elfuncs.h
===================================================================
RCS file: elfuncs.h
diff -N elfuncs.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ elfuncs.h   5 Apr 2011 06:19:56 -0000
@@ -0,0 +1,35 @@
+/*     $OpenBSD: elfuncs.h,v 1.3 2006/09/30 14:34:13 kettenis Exp $    */
+
+/*
+ * Copyright (c) 2004 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+int    elf32_fix_header(Elf32_Ehdr *eh);
+Elf32_Shdr*elf32_load_shdrs(const char *, FILE *, off_t, Elf32_Ehdr *);
+int    elf32_fix_shdrs(Elf32_Ehdr *eh, Elf32_Shdr *shdr);
+
+int    elf64_fix_header(Elf64_Ehdr *eh);
+Elf64_Shdr*elf64_load_shdrs(const char *, FILE *, off_t, Elf64_Ehdr *);
+int    elf64_fix_shdrs(Elf64_Ehdr *eh, Elf64_Shdr *shdr);
Index: strings.c
===================================================================
RCS file: /cvs/src/usr.bin/strings/strings.c,v
retrieving revision 1.15
diff -u -p -r1.15 strings.c
--- strings.c   27 Oct 2009 23:59:43 -0000      1.15
+++ strings.c   5 Apr 2011 06:20:04 -0000
@@ -34,46 +34,53 @@
 
 #include <a.out.h>
 #include <ctype.h>
+#include <elf_abi.h>
+#include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <locale.h>
 #include <unistd.h>
-#include <err.h>
 
-#define FORMAT_DEC "%07ld "
-#define FORMAT_OCT "%07lo "
-#define FORMAT_HEX "%07lx "
+#include "elfuncs.h"
+
+#define FORMAT_DEC "%7lld "
+#define FORMAT_OCT "%7llo "
+#define FORMAT_HEX "%7llx "
 
 #define DEF_LEN                4               /* default minimum string 
length */
 #define ISSTR(ch)      (isascii(ch) && (isprint(ch) || ch == '\t'))
 
-typedef struct exec    EXEC;           /* struct exec cast */
-
-static long    foff;                   /* offset in the file */
-static int     hcnt,                   /* head count */
-               head_len,               /* length of header */
-               read_len;               /* length to read */
-static u_char  hbfr[sizeof(EXEC)];     /* buffer for struct exec */
-
-static void usage(void);
-int getch(void);
+union hdr {
+       struct exec     aout;
+       Elf32_Ehdr      elf32;
+       Elf64_Ehdr      elf64;
+};
+
+static int     hcnt,                           /* head count */
+               head_len;                       /* length of header */
+static u_char  hbuf[sizeof(union hdr)];        /* buffer for header */
+
+short          scan_entirely;
+short          print_name;
+char           *offset_format;
+int            minlen, maxlen, buflen;
+
+static void    usage(void);
+int            getch(void);
+int            process_file(const char *, u_char *);
+void           find_strings(const char *, u_char *, off_t, off_t);
 
 int
 main(int argc, char *argv[])
 {
-       extern char *optarg;
-       extern int optind;
-       int ch, cnt;
-       u_char *C;
-       EXEC *head;
-       int exitcode, minlen, maxlen, bfrlen;
-       short asdata, fflg;
-       u_char *bfr;
-       char *file, *p;
-       char *offset_format;
+       extern char     *optarg;
+       extern int       optind;
+       int              ch, rval = 0;
+       u_char          *buf;
+       char            *file, *p;
 
        setlocale(LC_ALL, "");
 
@@ -81,7 +88,7 @@ main(int argc, char *argv[])
         * for backward compatibility, allow '-' to specify 'a' flag; no
         * longer documented in the man page or usage string.
         */
-       asdata = exitcode = fflg = 0;
+       scan_entirely = print_name = 0;
        offset_format = NULL;
        minlen = -1;
        maxlen = -1;
@@ -103,10 +110,10 @@ main(int argc, char *argv[])
                        break;
                case '-':
                case 'a':
-                       asdata = 1;
+                       scan_entirely = 1;
                        break;
                case 'f':
-                       fflg = 1;
+                       print_name = 1;
                        break;
                case 'n':
                        minlen = atoi(optarg);
@@ -136,6 +143,7 @@ main(int argc, char *argv[])
                case '?':
                default:
                        usage();
+                       /* NOTREACHED */
                }
        argc -= optind;
        argv += optind;
@@ -146,85 +154,163 @@ main(int argc, char *argv[])
                errx(1, "length less than 1");
        if (maxlen != -1 && maxlen < minlen)
                errx(1, "max length less than min");
-       bfrlen = maxlen == -1 ? minlen : maxlen;
-       bfr = malloc(bfrlen + 1);
-       if (!bfr)
+
+       buflen = (maxlen == -1) ? minlen : maxlen;
+       if ((buf = malloc(buflen + 1)) == NULL)
                err(1, "malloc");
-       bfr[bfrlen] = '\0';
+       buf[buflen] = '\0';
+
        file = "stdin";
        do {
                if (*argv) {
                        file = *argv++;
                        if (!freopen(file, "r", stdin)) {
                                warn("%s", file);
-                               exitcode = 1;
-                               goto nextfile;
+                               rval = 1;
+                               continue;
                        }
                }
-               foff = 0;
-#define DO_EVERYTHING()                {read_len = -1; head_len = 0; goto 
start;}
-               read_len = -1;
-               if (asdata)
-                       DO_EVERYTHING()
-               else {
-                       head = (EXEC *)hbfr;
-                       if ((head_len =
-                           read(fileno(stdin), head, sizeof(EXEC))) == -1)
-                               DO_EVERYTHING()
-                       if (head_len == sizeof(EXEC) && !N_BADMAG(*head)) {
-                               foff = N_TXTOFF(*head);
-                               if (fseek(stdin, foff, SEEK_SET) == -1)
-                                       DO_EVERYTHING()
-                               read_len = head->a_text + head->a_data;
-                               head_len = 0;
+               rval |= process_file(file, buf);
+       } while (*argv);
+
+       exit(rval);
+}
+
+int
+process_file(const char *file, u_char *buf)
+{
+       union hdr       *head;
+       size_t           bytes;
+       off_t            foff = 0, offset = 0;
+       int              i, rval = 0;
+
+       head_len = 0;
+
+       if (scan_entirely) {
+               find_strings(file, buf, 0, 0);
+               return (0);
+       }
+
+       head = (union hdr*)hbuf;
+       bzero(head, sizeof(*head));
+
+       bytes = fread((char *)head, 1, sizeof(*head), stdin);
+       if (bytes == -1) {
+               find_strings(file, buf, 0, 0);
+               return (1);
+       }
+
+       if (bytes >= sizeof(head->aout) && !N_BADMAG(head->aout)) {
+               foff = N_TXTOFF(head->aout);
+               if (fseeko(stdin, foff, SEEK_SET)) {
+                       warn("%s: fseeko", file);
+                       rval = errno;
+                       find_strings(file, buf, 0, 0);
+                       return (rval);
+               }
+               offset = head->aout.a_text + head->aout.a_data;
+       } else if (IS_ELF(head->elf32) &&
+           head->elf32.e_ident[EI_CLASS] == ELFCLASS32 &&
+           head->elf32.e_ident[EI_VERSION] == ELF_TARG_VER) {
+               Elf32_Shdr      *shdrs;
+               Elf32_Ehdr      *h = &head->elf32;
+
+               if ((shdrs = elf32_load_shdrs(file, stdin, 0, h)) == NULL)
+                       return (1);
+
+               for (i = 1;  i < h->e_shnum;  i++) {
+                       if (shdrs[i].sh_type != SHT_NOBITS &&
+                           (shdrs[i].sh_flags & SHF_ALLOC)) {
+                               foff = shdrs[i].sh_offset;
+                               offset = foff + shdrs[i].sh_size;
+                               if (fseeko(stdin, foff, SEEK_SET)) {
+                                       warn("%s: fseeko", file);
+                                       rval = errno;
+                                       break;
+                               }
+                               find_strings(file, buf, foff, offset);
                        }
-                       else
-                               hcnt = 0;
                }
-start:
-               for (cnt = 0, C = bfr; (ch = getch()) != EOF;) {
-                       if (ISSTR(ch)) {
-                               *C++ = ch;
-                               if (++cnt < minlen)
-                                       continue;
-                               if (maxlen != -1) {
-                                       while ((ch = getch()) != EOF &&
-                                              ISSTR(ch) && cnt++ < maxlen)
-                                               *C++ = ch;
-                                       if (ch == EOF ||
-                                           (ch != 0 && ch != '\n')) {
-                                               /* get all of too big string */
-                                               while ((ch = getch()) != EOF &&
-                                                      ISSTR(ch))
-                                                       ;
-                                               ungetc(ch, stdin);
-                                               goto out;
-                                       }
-                                       *C = 0;
+
+               free(shdrs);
+               return (rval);
+       } else if (IS_ELF(head->elf64) &&
+           head->elf64.e_ident[EI_CLASS] == ELFCLASS64 &&
+           head->elf64.e_ident[EI_VERSION] == ELF_TARG_VER) {
+               Elf64_Shdr      *shdrs;
+               Elf64_Ehdr      *h = &head->elf64;
+
+               if ((shdrs = elf64_load_shdrs(file, stdin, 0, h)) == NULL)
+                       return (1);
+
+               for (i = 1;  i < h->e_shnum;  i++) {
+                       if (shdrs[i].sh_type != SHT_NOBITS &&
+                           (shdrs[i].sh_flags & SHF_ALLOC)) {
+                               foff = shdrs[i].sh_offset;
+                               offset = foff + shdrs[i].sh_size;
+                               if (fseeko(stdin, foff, SEEK_SET)) {
+                                       warn("%s: fseeko", file);
+                                       rval = errno;
+                                       break;
                                }
+                               find_strings(file, buf, foff, offset);
+                       }
+               }
+
+               free(shdrs);
+               return (rval);
+       } else {
+               head_len = bytes;
+               hcnt = 0;
+       }
 
-                               if (fflg)
-                                       printf("%s:", file);
+       find_strings(file, buf, foff, offset);
 
-                               if (offset_format) 
-                                       printf(offset_format, foff - minlen);
+       return (0);
+}
 
-                               printf("%s", bfr);
-                               
-                               if (maxlen == -1)
-                                       while ((ch = getch()) != EOF &&
-                                              ISSTR(ch))
-                                               putchar((char)ch);
-                               putchar('\n');
-                       out:
-                               ;
-                       }
-                       cnt = 0;
-                       C = bfr;
+void
+find_strings(const char *filename, u_char *buf, off_t foff, off_t offset)
+{
+       int     i, c = 0;
+
+       while (c != EOF && (foff < offset || !offset)) {
+               for (i = 0; i < buflen; i++) {
+                       c = getch();
+                       foff++;
+                       if (!ISSTR(c) || foff == offset)
+                               break;
+                       buf[i] = c;
+                       buf[i + 1] = '\0';
                }
-nextfile: ;
-       } while (*argv);
-       exit(exitcode);
+               
+               if (i >= minlen) {
+                       if (print_name)
+                               printf("%s: ", filename);
+
+                       if (offset_format) 
+                               printf(offset_format, (foff - i));
+
+                       printf("%s", buf);
+
+                       if (maxlen == -1)
+                               while (c != EOF && (foff < offset || !offset)) {
+                                       c = getch();
+                                       foff++;
+                                       if (!ISSTR(c) || foff == offset)
+                                               break;
+                                       putchar(c);
+                               }
+                       putchar('\n');
+
+                       /* Eat all the string if it's too big */
+                       if (i == maxlen && (c != '\n' || c != '\0'))
+                               while ((foff < offset || !offset) && ISSTR(c)) {
+                                       c = getch();
+                                       foff++;
+                               }
+               }
+       }
 }
 
 /*
@@ -234,21 +320,23 @@ nextfile: ;
 int
 getch(void)
 {
-       ++foff;
        if (head_len) {
                if (hcnt < head_len)
-                       return((int)hbfr[hcnt++]);
+                       return ((int)hbuf[hcnt++]);
                head_len = 0;
        }
-       if (read_len == -1 || read_len-- > 0)
-               return(getchar());
-       return(EOF);
+
+       return  (getchar());
 }
 
 static void
 usage(void)
 {
+       extern char *__progname;
+
        (void)fprintf(stderr,
-           "usage: strings [-afo] [-m number] [-n number] [-t radix] [file 
...]\n");
+           "usage: %s [-afo] [-m number] [-n number] [-t radix] [file ...]\n",
+           __progname);
+
        exit(1);
 }

Reply via email to