pollita         Tue May  2 18:23:33 2006 UTC

  Modified files:              
    /php-src/ext/standard       file.c 
  Log:
  Unicode Updates:
  file() now unicode ready
  get_meta_tags() updated for FS encoding, but content is still non-unicode
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.439&r2=1.440&diff_format=u
Index: php-src/ext/standard/file.c
diff -u php-src/ext/standard/file.c:1.439 php-src/ext/standard/file.c:1.440
--- php-src/ext/standard/file.c:1.439   Fri Apr 28 19:03:57 2006
+++ php-src/ext/standard/file.c Tue May  2 18:23:32 2006
@@ -21,7 +21,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: file.c,v 1.439 2006/04/28 19:03:57 fmk Exp $ */
+/* $Id: file.c,v 1.440 2006/05/02 18:23:32 pollita Exp $ */
 
 /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
 
@@ -360,6 +360,7 @@
 {
        char *filename;
        int filename_len;
+       zend_uchar filename_type;
        zend_bool use_include_path = 0;
        int in_tag = 0, done = 0;
        int looking_for_val = 0, have_name = 0, have_content = 0;
@@ -372,15 +373,22 @@
        memset(&md, 0, sizeof(md));
 
        /* Parse arguments */
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b",
-                                                         &filename, 
&filename_len, &use_include_path) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|b",
+                                                         &filename, 
&filename_len, &filename_type, &use_include_path) == FAILURE) {
                return;
        }
 
+       if (filename_type == IS_UNICODE) {
+               if (php_stream_path_encode(NULL, &filename, &filename_len, 
(UChar*)filename, filename_len, REPORT_ERRORS, FG(default_context)) == FAILURE) 
{
+                       RETURN_FALSE;
+               }
+       }
        md.stream = php_stream_open_wrapper(filename, "rb",
                        (use_include_path ? USE_PATH : 0) | REPORT_ERRORS,
                        NULL);
-
+       if (filename_type == IS_UNICODE) {
+               efree(filename);
+       }
        if (!md.stream) {
                RETURN_FALSE;
        }
@@ -506,7 +514,6 @@
        zend_uchar filename_type;
        void *contents;
        long flags = 0;
-       zend_bool use_include_path = 0;
        php_stream *stream;
        int len;
        long offset = -1;
@@ -732,7 +739,7 @@
 }
 /* }}} */
 
-/* {{{ proto array file(string filename [, int flags[, resource context]])
+/* {{{ proto array file(string filename [, int flags[, resource context]]) U
    Read entire file into an array */
 
 #define PHP_FILE_BUF_SIZE      80
@@ -742,23 +749,24 @@
 {
        char *filename;
        int filename_len;
-       char *target_buf=NULL, *p, *s, *e;
+       zend_uchar filename_type;
+       char *target_buf=NULL;
        register int i = 0;
        int target_len;
-       char eol_marker = '\n';
        long flags = 0;
        zend_bool use_include_path;
        zend_bool include_new_line;
        zend_bool skip_blank_lines;
+       zend_bool text_mode;
        php_stream *stream;
        zval *zcontext = NULL;
        php_stream_context *context = NULL;
 
        /* Parse arguments */
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lr!", 
&filename, &filename_len, &flags, &zcontext) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|lr!", 
&filename, &filename_len, &filename_type, &flags, &zcontext) == FAILURE) {
                return;
        }
-       if (flags < 0 || flags > (PHP_FILE_USE_INCLUDE_PATH | 
PHP_FILE_IGNORE_NEW_LINES | PHP_FILE_SKIP_EMPTY_LINES | 
PHP_FILE_NO_DEFAULT_CONTEXT)) {
+       if (flags < 0 || flags > (PHP_FILE_USE_INCLUDE_PATH | 
PHP_FILE_IGNORE_NEW_LINES | PHP_FILE_SKIP_EMPTY_LINES | 
PHP_FILE_NO_DEFAULT_CONTEXT | PHP_FILE_TEXT)) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "'%ld' flag is not 
supported", flags);
                RETURN_FALSE;
        }
@@ -766,10 +774,19 @@
        use_include_path = flags & PHP_FILE_USE_INCLUDE_PATH;
        include_new_line = !(flags & PHP_FILE_IGNORE_NEW_LINES);
        skip_blank_lines = flags & PHP_FILE_SKIP_EMPTY_LINES;
+       text_mode = flags & PHP_FILE_TEXT;
 
        context = php_stream_context_from_zval(zcontext, flags & 
PHP_FILE_NO_DEFAULT_CONTEXT);
 
-       stream = php_stream_open_wrapper_ex(filename, "rb", (use_include_path ? 
USE_PATH : 0) | REPORT_ERRORS, NULL, context);
+       if (filename_type == IS_UNICODE) {
+               if (php_stream_path_encode(NULL, &filename, &filename_len, 
(UChar*)filename, filename_len, REPORT_ERRORS, context) == FAILURE) {
+                       RETURN_FALSE;
+               }
+       }
+       stream = php_stream_open_wrapper_ex(filename, text_mode ? "rt" : "rb", 
(use_include_path ? USE_PATH : 0) | REPORT_ERRORS, NULL, context);
+       if (filename_type == IS_UNICODE) {
+               efree(filename);
+       }
        if (!stream) {
                RETURN_FALSE;
        }
@@ -777,9 +794,53 @@
        /* Initialize return array */
        array_init(return_value);
 
-       if ((target_len = php_stream_copy_to_mem(stream, &target_buf, 
PHP_STREAM_COPY_ALL, 0))) {
-               s = target_buf;
-               e = target_buf + target_len;
+       target_len = php_stream_copy_to_mem_ex(stream, stream->readbuf_type, 
(void**)&target_buf, PHP_STREAM_COPY_ALL, -1, 0);
+
+       if (!target_len) {
+               /* Empty file, do nothing and return an empty array */
+       } else if (stream->readbuf_type == IS_UNICODE) {
+               UChar *s = (UChar*)target_buf, *p;
+               UChar *e = s + target_len, eol_marker = '\n';
+
+               if (!(p = php_stream_locate_eol(stream, ZSTR(target_buf), 
target_len TSRMLS_CC))) {
+                       p = e;
+                       goto uparse_eol;
+               }
+
+               if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
+                       eol_marker = '\r';
+               }
+
+               /* for performance reasons the code is quadruplicated, so that 
the if (include_new_line/unicode 
+                * will not need to be done for every single line in the file.
+                */
+               if (include_new_line) { 
+                       do {
+                               p++;
+uparse_eol:
+                               add_index_unicodel(return_value, i++, 
eustrndup(s, p-s), p-s, 0);
+                               s = p;
+                       } while ((p = u_memchr(p, eol_marker, (e-p))));
+               } else {
+                       do {
+                               if (skip_blank_lines && !(p-s)) {
+                                       s = ++p;
+                                       continue;
+                               }
+                               add_index_unicodel(return_value, i++, 
eustrndup(s, p-s), p-s, 0);
+                               s = ++p;
+                       } while ((p = u_memchr(p, eol_marker, (e-p))));
+               }
+               
+               /* handle any left overs of files without new lines */
+               if (s != e) {
+                       p = e;
+                       goto uparse_eol;
+               }
+
+       } else { /* !IS_UNICODE */
+               char *s = target_buf, *p;
+               char *e = target_buf + target_len, eol_marker = '\n';
        
                if (!(p = php_stream_locate_eol(stream, ZSTR(target_buf), 
target_len TSRMLS_CC))) {
                        p = e;
@@ -790,9 +851,6 @@
                        eol_marker = '\r';
                }       
 
-               /* for performance reasons the code is duplicated, so that the 
if (include_new_line) 
-                * will not need to be done for every single line in the file.
-                */
                if (include_new_line) { 
                        do {
                                p++;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to