Hi!

----

Attached (as "astksh20130926_sparsefile_cp003.diff.txt") is an updated
patch with |SEEK_HOLE|/|SEEK_DATA|-based sparse file support for
cp(1)/mv(1)/ln(1) and the "lssparsemap" (previously called "lsholes")
builtin.

* Notes:
- This is an unfinished work-in-progress snapshot... just dumping it
here since I've been asked to come-up with an updated version
- Tests are still missing
- lssparsemap(1) still needs it's --man output finished
- Note that the "builtksh93.sh" wrapper script to build ksh93 requires
manual entries for "lssparsemap"
- cp(1) still has no --sparse option

Comments/rants/feedback etc. welcome... :-)

----

Bye,
Roland

-- 
  __ .  . __
 (o.\ \/ /.o) [email protected]
  \__\/\/__/  MPEG specialist, C&&JAVA&&Sun&&Unix programmer
  /O /==\ O\  TEL +49 641 3992797
 (;O/ \/ \O;)
diff -r -u original/src/lib/libast/include/sfio.h 
build_cpsparse/src/lib/libast/include/sfio.h
--- src/lib/libast/include/sfio.h       2013-08-27 18:32:42.000000000 +0200
+++ src/lib/libast/include/sfio.h       2013-10-09 17:58:27.594193327 +0200
@@ -252,6 +252,7 @@
 extern ssize_t         sfread _ARG_((Sfio_t*, Void_t*, size_t));
 extern ssize_t         sfwrite _ARG_((Sfio_t*, const Void_t*, size_t));
 extern Sfoff_t         sfmove _ARG_((Sfio_t*, Sfio_t*, Sfoff_t, int));
+extern int             sfcopyfile _ARG_((Sfio_t* ip, Sfio_t* op, int flags));
 extern int             sfclose _ARG_((Sfio_t*));
 extern Sfoff_t         sftell _ARG_((Sfio_t*));
 extern Sfoff_t         sfseek _ARG_((Sfio_t*, Sfoff_t, int));
@@ -322,6 +323,33 @@
 extern ssize_t         sfslen _ARG_((void));
 extern ssize_t         sfmaxr _ARG_((ssize_t, int));
 
+#if defined(SEEK_HOLE) && defined(SEEK_DATA) && 
!defined(AST_SPARSEFILE_SUPPORT)
+#define AST_SPARSEFILE_SUPPORT 1
+#endif
+
+#if AST_SPARSEFILE_SUPPORT
+typedef struct _sparsefiledatarec
+{
+       enum
+       {
+               SPFDREC_UNDEFINED       = 0,
+               SPFDREC_DATA            = 1,
+               SPFDREC_HOLE            = 2
+       } type;
+       off_t begin;
+       off_t end;
+} sparsefiledatarec;
+
+/*
+ * these two should live outside sfio because they operate on fds
+ * and not sfio steams
+ */
+extern bool supports_seek_hole(int fd);
+extern sparsefiledatarec *sparsefile_enumerate_holes(int fd, ssize_t 
*res_numrec);
+
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+
 #undef extern
 _END_EXTERNS_
 
diff -r -u original/src/lib/libast/sfio/sfmove.c 
build_cpsparse/src/lib/libast/sfio/sfmove.c
--- src/lib/libast/sfio/sfmove.c        2013-09-20 13:46:38.000000000 +0200
+++ src/lib/libast/sfio/sfmove.c        2013-10-09 17:58:21.239667312 +0200
@@ -244,3 +244,255 @@
        SFOPEN(fr,0);
        SFMTXRETURN(fr, n_move);
 }
+
+
+#if AST_SPARSEFILE_SUPPORT
+bool supports_seek_hole(int fd)
+{
+       off_t pos;
+
+/* Linux does not support |_PC_MIN_HOLE_SIZE| */
+#ifdef _PC_MIN_HOLE_SIZE
+       if (fpathconf(fd, _PC_MIN_HOLE_SIZE) < 0)
+               return (false);
+#endif
+
+       /*
+        * Test two error conditions:
+        * 1. we have been compiled on an OS revision that
+        * supports |SEEK_HOLE| but run on an OS revision
+        * that does not support |SEEK_HOLE|, we get |EINVAL|.
+        * 2. the underlying filesystem does not support
+        * |SEEK_HOLE|, we get |ENOTSUP|.
+        */
+       pos = lseek(fd, 0LL, SEEK_HOLE);
+       if (pos < 0LL)
+       {
+               if ((errno == EINVAL) || (errno == ENOTSUP))
+                       return (false);
+       }
+
+       /* Do the same for |SEEK_DATA| */
+       pos = lseek(fd, 0LL, SEEK_DATA);
+       if (pos < 0LL)
+       {
+               if ((errno == EINVAL) || (errno == ENOTSUP))
+                       return (false);
+       }
+
+       return (true);
+}
+
+#if 1
+#define D(x)
+#else
+#define D(x) x
+#endif
+
+sparsefiledatarec *sparsefile_enumerate_holes(int fd, ssize_t *res_numrec)
+{
+       off_t                   data_pos,
+                               hole_pos,
+                               pos;
+       struct stat             st;
+       D(int                   saved_errno);
+       sparsefiledatarec       *rec    = NULL;
+       size_t                  numrec  = 0UL;
+
+       *res_numrec = -1L;
+       
+       if (fstat(fd, &st) < 0)
+               return (NULL);
+
+       /* special case for files with zero size */
+       if (st.st_size == 0)
+       {
+               rec = malloc(sizeof(sparsefiledatarec));
+               if (!rec)
+                       return (NULL);
+               rec->type       = SPFDREC_DATA;
+               rec->begin      = 0;
+               rec->end        = 0;
+               *res_numrec     = 0;
+               return (rec);
+       }
+
+       for (hole_pos = data_pos = pos = 0LL ; pos < st.st_size ; )
+       {
+               data_pos = lseek(fd, pos, SEEK_DATA);
+               D(saved_errno=errno;(void)printf("# data pos = %8ld\n", 
data_pos);errno=saved_errno);
+               if (data_pos < 0)
+               {
+                       if (errno == ENXIO)
+                       {
+                               /* final data block */
+                       }
+                       else
+                       {
+                               free(rec);
+                               return (NULL);
+                       }
+               }
+
+               hole_pos = lseek(fd, pos, SEEK_HOLE);
+               D(saved_errno=errno;(void)printf("# hole pos = %8ld\n", 
hole_pos);errno=saved_errno);
+               if (hole_pos < 0)
+               {
+                       if (errno == ENXIO)
+                       {
+                               /* final hole block */
+                       }
+                       else
+                       {
+                               free(rec);
+                               return (NULL);
+                       }
+               }
+
+               if (data_pos == pos)
+               {
+                       D((void)printf("#data from %8ld to %8ld (size %8ld)\n",
+                               data_pos, hole_pos, (hole_pos - data_pos)));
+                       pos = hole_pos;
+                       
+                       rec = realloc(rec, 
sizeof(sparsefiledatarec)*(numrec+1));
+                       if (!rec)
+                               return (NULL);
+                       rec[numrec].type        = SPFDREC_DATA;
+                       rec[numrec].begin       = data_pos;
+                       rec[numrec].end         = hole_pos;
+                       numrec++;
+               }
+               else if (hole_pos == pos)
+               {
+                       D((void)printf("#hole from %8ld to %8ld (size %8ld)\n",
+                               hole_pos, data_pos, (data_pos - hole_pos)));
+                       pos = data_pos;
+
+                       rec = realloc(rec, 
sizeof(sparsefiledatarec)*(numrec+1));
+                       if (!rec)
+                               return (NULL);
+                       rec[numrec].type        = SPFDREC_HOLE;
+                       rec[numrec].begin       = hole_pos;
+                       rec[numrec].end         = data_pos;
+                       numrec++;
+               }
+               else
+               {
+                       free(rec);
+                       return (NULL);
+               }
+       }
+
+       *res_numrec = numrec;
+
+       return (rec);
+}
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+
+/*
+ * This function is a layer above |sfmove()| to copy both data and
+ * holes in sparse files. Most consumers in sfio (AFAIK until now
+ * only cp(1)/mv(1)/ln(1) fall into this category) do not deal with
+ * preserving holes in sparse files at all so we moved this
+ * extra complexity into a seperate function.
+ */
+int sfcopyfile(Sfio_t* ip, Sfio_t* op, int flags)
+{
+       int                     rfd;
+       int                     op_flags_saved;
+       int                     res = 0;
+#if AST_SPARSEFILE_SUPPORT
+       off_t                   origpos; /* original position */
+       sparsefiledatarec       *sprec;
+       ssize_t                 spnumrec = 0L;
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+       rfd = sffileno(ip);
+
+#if AST_SPARSEFILE_SUPPORT
+       /*
+        * We enumerate the data/hole sections before copying the data
+        * for two reasons:
+        * 1. Early filesystem implementations of |SEEK_HOLE|/|SEEK_DATA|
+        *    had bugs when doing both data/hole enumeration and reading
+        *    data at the same time
+        * 2. Avoid extra seeking around which would otherwise be needed
+        *    to avoid clashes between copy buffer size vs. data/hole
+        *    boundaries. The boundaries *MUST* be preserved since
+        *    filesystems are allowed to turn two continous |lseek()|
+        *    calls into two holes instead of combining them into one.
+        *    The same can happen for data sections, i.e. two |write()|
+        *    calls may result in two independent |SEEK_DATA| sections.
+        */
+       origpos = lseek(rfd, 0LL, SEEK_CUR);
+       if (origpos < 0)
+               return (-1);
+       sprec = sparsefile_enumerate_holes(rfd, &spnumrec);
+       if (lseek(rfd, origpos, SEEK_SET) < 0)
+               return (-1);
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+       /*
+        * (Temporarily) set |SF_WHOLE| to prevent |sfmove()| from turning
+        * sequences of zero bytes into (more) holes (this would lead to
+        * data corruption for applications (like Oracle DB) which actually
+        * expect the holes to be at the correct positions).
+        *
+        * The difference is in this case that sequences of zero bytes
+        * represent "valid data of zero bytes here" while the holes
+        * represent "no data here". Turning the zero bytes into holes would
+        * therefore destroy data.
+        */
+       op_flags_saved = op->flags & SF_WHOLE;
+       op->flags |= SF_WHOLE;
+       
+#if AST_SPARSEFILE_SUPPORT
+       if (sprec)
+       {
+               ssize_t i;
+
+               for (i=0 ; (i < spnumrec) && (res == 0) ; i++)
+               {
+                       Sfoff_t movesize = sprec[i].end - sprec[i].begin;
+                       switch(sprec[i].type)
+                       {
+                               case SPFDREC_DATA:
+                                       if (sfmove(ip, op, movesize, -1) < 0)
+                                               res |= 3;
+                                       break;
+                               case SPFDREC_HOLE:
+                                       if (sfseek(ip, movesize, SEEK_CUR) < 0)
+                                               res |= 1;
+                                       if (sfseek(op, movesize, SEEK_CUR) < 0)
+                                               res |= 2;
+                                       break;
+                       }
+               }
+
+               /*
+                * Just seeking to a new postion does not set
+                * the sfio-internal eof flag. If the file
+                * ends with a hole we explicitly have to read
+                * something to get the EOF (or not)
+                */
+               if ((res == 0) && (sfgetc(ip) != EOF))
+               {
+                       res |= 1;
+               }
+
+               free(sprec);
+       }
+       else
+#endif /* AST_SPARSEFILE_SUPPORT */
+       {
+               if (sfmove(ip, op, (Sfoff_t)SF_UNBOUND, -1) < 0)
+                       res |= 3;
+               if (!sfeof(ip))
+                       res |= 1;
+       }
+       
+       op->flags = (op->flags & ~SF_WHOLE) | op_flags_saved;
+       return (-res);
+}
diff -r -u original/src/lib/libcmd/cp.c build_cpsparse/src/lib/libcmd/cp.c
--- src/lib/libcmd/cp.c 2013-07-16 23:45:26.000000000 +0200
+++ src/lib/libcmd/cp.c 2013-10-09 18:06:05.034834989 +0200
@@ -620,7 +620,7 @@
                                        return 0;
                                }
                                n = 0;
-                               if (sfmove(ip, op, (Sfoff_t)SF_UNBOUND, -1) < 0)
+                               if (sfcopyfile(ip, op, 0) < 0)
                                        n |= 3;
                                if (!sfeof(ip))
                                        n |= 1;
@@ -1012,3 +1012,284 @@
        }
        return error_info.errors != 0;
 }
+
+
+#if AST_SPARSEFILE_SUPPORT
+static
+void printrec(sparsefiledatarec *rec, ssize_t numrec)
+{
+       ssize_t i;
+
+       for (i=0 ; i < numrec ; i++)
+       {
+               switch(rec[i].type)
+               {
+                       case SPFDREC_DATA:
+                               (void)printf("data: from\t%8ld to\t%8ld\t(size 
%8ld)\n",
+                                       (long)rec[i].begin,
+                                       (long)rec[i].end,
+                                       (long)(rec[i].end - rec[i].begin));
+                               break;
+                       case SPFDREC_HOLE:
+                               (void)printf("hole: from\t%8ld to\t%8ld\t(size 
%8ld)\n",
+                                       (long)rec[i].begin,
+                                       (long)rec[i].end,
+                                       (long)(rec[i].end - rec[i].begin));
+                               break;
+                       case SPFDREC_UNDEFINED: /*lint*/
+                               break;
+               }
+       }
+}
+
+
+static
+void printreccpv(sparsefiledatarec *rec, ssize_t numrec)
+{
+       ssize_t i;
+
+       (void)printf("\ttypeset -C -a sparselayout=(\n");
+       
+       for (i=0 ; i < numrec ; i++)
+       {
+               switch(rec[i].type)
+               {
+                       case SPFDREC_DATA:
+                               
(void)printf("\t\t(\n\t\t\ttype='data'\n\t\t\ttypeset -l -i 
from=%ld\n\t\t\ttypeset -l -i to=%ld\n\t\t\ttypeset -l -i size=%ld\n\t\t)\n",
+                                       (long)rec[i].begin,
+                                       (long)rec[i].end,
+                                       (long)(rec[i].end - rec[i].begin));
+                               break;
+                       case SPFDREC_HOLE:
+                               
(void)printf("\t\t(\n\t\t\ttype='hole'\n\t\t\ttypeset -l -i 
from=%ld\n\t\t\ttypeset -l -i to=%ld\n\t\t\ttypeset -l -i size=%ld\n\t\t)\n",
+                                       (long)rec[i].begin,
+                                       (long)rec[i].end,
+                                       (long)(rec[i].end - rec[i].begin));
+                               break;
+                       case SPFDREC_UNDEFINED: /*lint*/
+                               break;
+               }
+       }
+       
+       (void)printf("\t)\n");
+}
+
+static
+bool hasholerecord(sparsefiledatarec *rec, ssize_t numrec)
+{
+       ssize_t i;
+
+       for (i=0 ; i < numrec ; i++)
+       {
+               switch(rec[i].type)
+               {
+                       case SPFDREC_HOLE:
+                               return (true);
+                       case SPFDREC_DATA:
+                       case SPFDREC_UNDEFINED: /*lint*/
+                               break;
+               }
+       }
+       return (false);
+}
+
+
+static
+int do_listdataholeregions(const char *filename, bool compoundfmt)
+{
+       int                     fd;
+       int                     res     = EXIT_SUCCESS;
+       sparsefiledatarec       *rec;
+       ssize_t                 numrec  = 0UL;
+       
+       if (compoundfmt)
+               (void)printf("(\n\tfilename='%s'\n", filename);
+       else
+               (void)printf("# file: %s\n", filename);
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+       {
+               error(ERROR_SYSTEM|ERROR_WARNING, "Cannot open %s", filename);
+               res = EXIT_FAILURE;
+               goto done;
+       }
+       
+       if (!supports_seek_hole(fd))
+       {
+               error(ERROR_SYSTEM|ERROR_WARNING, "filesystem does not support 
holes for %s", filename);
+               (void)close(fd);
+               res = EXIT_FAILURE;
+               goto done;
+       }
+
+       (void)lseek(fd, 0LL, SEEK_SET); 
+       rec = sparsefile_enumerate_holes(fd, &numrec);
+       if (!rec)
+               error(ERROR_SYSTEM|ERROR_WARNING, "cannot obtain list of sparse 
entries for %s", filename);
+       (void)close(fd);
+       
+       if (!rec)
+       {
+               res = EXIT_FAILURE;
+               goto done;
+       }
+       
+       if (compoundfmt)
+               printreccpv(rec, numrec);
+       else
+               printrec(rec, numrec);
+       
+       free(rec);
+
+done:
+       if (compoundfmt)
+               (void)printf(")\n");
+       
+       return (res);
+}
+
+
+static
+int do_issparsefile(const char *filename)
+{
+       int                     fd;
+       sparsefiledatarec       *rec;
+       ssize_t                 numrec  = 0UL;
+       bool                    hasholes;
+       
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+       {
+               error(ERROR_SYSTEM|ERROR_WARNING, "Cannot open %s", filename);
+               return (EXIT_FAILURE);
+       }
+       
+       if (!supports_seek_hole(fd))
+       {
+               error(ERROR_SYSTEM|ERROR_WARNING, "filesystem does not support 
holes for %s", filename);
+               (void)close(fd);
+               return (EXIT_FAILURE);
+       }
+
+       (void)lseek(fd, 0LL, SEEK_SET); 
+       rec = sparsefile_enumerate_holes(fd, &numrec);
+       if (!rec)
+               error(ERROR_SYSTEM|ERROR_WARNING, "cannot obtain list of sparse 
entries for %s", filename);
+       (void)close(fd);
+       
+       if (!rec)
+               return (EXIT_FAILURE);
+       
+       hasholes = hasholerecord(rec, numrec);
+       
+       free(rec);
+       
+       return (hasholes?EXIT_SUCCESS:EXIT_FAILURE);
+}
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+
+static const char optlssparsemap[] =
+"[-?\n@(#)$Id: lssparsemap (AT&T Research) 2013-10-04 $\n]"
+"[-author?Roland Mainz <[email protected]>]"
+"[-license?http://www.eclipse.org/org/documents/epl-v10.html]";
+"[+NAME?lssparsemap - list hole/data layout of sparse files]"
+"[+DESCRIPTION?\blssparsemap\b displays information about sparse files"
+       ".]"
+"[+?Write me.]"
+"[l:list?Print data/hole layout with size and offsets of each region.]"
+"[t:testsparse|issparse?Test whether a file has one or more holes.]"
+"[C:compoundfmt?Output data as sequence of compound variables, one per file.]"
+"\n"
+"\n filename\n"
+"filename ...\n"
+"\n"
+"[+EXIT STATUS?]"
+    "{"
+        "[+0?Successful Completion.]"
+        "[+1?One or more files are not sparse if option --issparse was given.]"
+        "[+>0?An error occurred.]"
+    "}"
+"[+NOTES?]{"
+       "[+?A \"hole\" in a file is defined as a contiguous range of "
+       "bytes in a file, all reading as value of zero, representing "
+       "'no data'. Not all zeros in a file are guranteed to represent "
+       "holes, in fact sequences of zeros can represent valid data with "
+       "the meaning of 'zeros here'.]"
+
+       "[+?For filesystems that do not supply information about holes, "
+       "the file will be represented as one entire data region.]"
+"}"
+
+"[+SEE ALSO?\bcp\b(1), \bmkfile\b(1), \blseek\b(3)]"
+;
+
+int
+b_lssparsemap(int argc, register char** argv, Shbltin_t* context)
+{
+       int     res             = 0;
+       bool    do_list         = false;
+       bool    do_test         = false;
+       bool    compoundfmt     = false;
+
+       cmdinit(argc, argv, context, ERROR_CATALOG, 0);
+       for (;;)
+       {
+               switch (optget(argv, optlssparsemap))
+               {
+                       case 'l':
+                               do_list = true;
+                               continue;
+                       case 't':
+                               do_test = true;
+                               continue;
+                       case 'C':
+                               compoundfmt = true;
+                               continue;
+                       case ':':
+                               error(2, "%s", opt_info.arg);
+                               break;
+                       case '?':
+                               error(ERROR_usage(2), "%s", opt_info.arg);
+                               break;
+               }
+               break;
+       }
+       argv += opt_info.index;
+       argc -= opt_info.index;
+       if (error_info.errors ||
+               argc < 1 ||
+               (!do_list && !do_test) ||
+               (do_list && do_test))
+               error(ERROR_usage(2), "%s", optusage(NiL));
+
+#if AST_SPARSEFILE_SUPPORT
+       if (do_list)
+       {
+               const char *name;
+               res = 0;
+       
+               while (name = *argv++)
+               {
+                       if (do_listdataholeregions(name, compoundfmt) != 0)
+                               res = 1;
+               }
+       }
+       else if (do_test)
+       {
+               const char *name;
+               res = 0;
+       
+               while (name = *argv++)
+               {
+                       if (do_issparsefile(name) != 0)
+                               res = 1;
+               }
+       }
+#else /* AST_SPARSEFILE_SUPPORT */
+       error(ERROR_ERROR, "No support for sparse files on this platform");
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+       return (res);
+}
_______________________________________________
ast-developers mailing list
[email protected]
http://lists.research.att.com/mailman/listinfo/ast-developers

Reply via email to