Author: stsp
Date: Tue Jun 14 13:47:02 2011
New Revision: 1135575
URL: http://svn.apache.org/viewvc?rev=1135575&view=rev
Log:
Extract the heuristic that detects binary data from svn_io_detect_mimetype2()
into a separate utility, svn_io_is_binary_data().
The greater plan is to detect whether data in svn: properties is binary.
We need this information when creating reject files for properties,
and also to offer more options during interactive conflict resolution of
property conflicts (currently only mine-full and theirs-full are supported,
which is suboptimal for multi-line properties like svn:ignore or svn:externals).
* subversion/include/svn_io.h
(svn_io_is_binary_data): Declare.
* subversion/libsvn_subr/io.c
(svn_io_detect_mimetype2): Call svn_io_is_binary_data().
(svn_io_is_binary_data): New, extracted from svn_io_detect_mimetype2().
Modified:
subversion/trunk/subversion/include/svn_io.h
subversion/trunk/subversion/libsvn_subr/io.c
Modified: subversion/trunk/subversion/include/svn_io.h
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_io.h?rev=1135575&r1=1135574&r2=1135575&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_io.h (original)
+++ subversion/trunk/subversion/include/svn_io.h Tue Jun 14 13:47:02 2011
@@ -1786,6 +1786,16 @@ svn_io_detect_mimetype(const char **mime
apr_pool_t *pool);
+/** Examine up to @a len bytes of data in @a buf to determine if the
+ * can be considered binary data, in which case return TRUE.
+ * If the data can be considered plain-text data, return FALSE.
+ *
+ * @since New in 1.7.
+ */
+svn_boolean_t
+svn_io_is_binary_data(const unsigned char *buf, apr_size_t len);
+
+
/** Wrapper for apr_file_open(). @a fname is utf8-encoded. */
svn_error_t *
svn_io_file_open(apr_file_t **new_file,
Modified: subversion/trunk/subversion/libsvn_subr/io.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/io.c?rev=1135575&r1=1135574&r2=1135575&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/io.c (original)
+++ subversion/trunk/subversion/libsvn_subr/io.c Tue Jun 14 13:47:02 2011
@@ -2944,9 +2944,18 @@ svn_io_detect_mimetype2(const char **mim
/* Now close the file. No use keeping it open any more. */
SVN_ERR(svn_io_file_close(fh, pool));
+ if (svn_io_is_binary_data(block, amt_read))
+ *mimetype = generic_binary;
+ return SVN_NO_ERROR;
+}
+
+
+svn_boolean_t
+svn_io_is_binary_data(const unsigned char *buf, apr_size_t len)
+{
/* Right now, this function is going to be really stupid. It's
- going to examine the first block of data, and make sure that 15%
+ going to examine the block of data, and make sure that 15%
of the bytes are such that their value is in the ranges 0x07-0x0D
or 0x20-0x7F, and that none of those bytes is 0x00. If those
criteria are not met, we're calling it binary.
@@ -2955,7 +2964,7 @@ svn_io_detect_mimetype2(const char **mim
the specified ranges, but I flubbed the condition. At any rate,
folks aren't complaining, so I'm not sure that it's worth
adjusting this retroactively now. --cmpilato */
- if (amt_read > 0)
+ if (len > 0)
{
apr_size_t i;
apr_size_t binary_count = 0;
@@ -2963,29 +2972,25 @@ svn_io_detect_mimetype2(const char **mim
/* Run through the data we've read, counting the 'binary-ish'
bytes. HINT: If we see a 0x00 byte, we'll set our count to its
max and stop reading the file. */
- for (i = 0; i < amt_read; i++)
+ for (i = 0; i < len; i++)
{
- if (block[i] == 0)
+ if (buf[i] == 0)
{
- binary_count = amt_read;
+ binary_count = len;
break;
}
- if ((block[i] < 0x07)
- || ((block[i] > 0x0D) && (block[i] < 0x20))
- || (block[i] > 0x7F))
+ if ((buf[i] < 0x07)
+ || ((buf[i] > 0x0D) && (buf[i] < 0x20))
+ || (buf[i] > 0x7F))
{
binary_count++;
}
}
- if (((binary_count * 1000) / amt_read) > 850)
- {
- *mimetype = generic_binary;
- return SVN_NO_ERROR;
- }
+ return (((binary_count * 1000) / len) > 850);
}
- return SVN_NO_ERROR;
+ return FALSE;
}