El lun, 28-11-2005 a las 16:01 -0500, Benj. Mako Hill escribió:
> <quote who="Javier Kohen" date="Mon, Nov 28, 2005 at 10:52:33AM -0300">
> > I wrote a patch that fixes the UTF-8 output issue. Seems to work
> > fine in non-wrap and wrap modes, with unformatted and formatted text
> > (e.g. bold text from man's output).
> >
> > Give it a try and let me know if you find any bugs.
> 
> If this works, I owe you several drinks. Hopefully, I'll be able to
> try it out tonight. I'll do an upload immediately if it works.
> 
> > I'll try to fix the UTF-8 input issues next.

Here we have...

UTF-8 enabled Most, with working (and enabled by default during build)
UTF-8 compliant RegExp searches.

Check it out while it's hot. I've tested all I could, including forward
and backward searches. Note that non-RegExp searches should not be more
broken than before, as I tried to avoid disturbing them. However, they
didn't work with UTF-8 or formatted text. I don't think this is a
problem, as the user can't switch RegExp searches off in run-time.

Most could use some improvement in general, for instance, to allow
backward searches to happen from the end of the buffer, and not just
from the beginning of the screen at the end of the buffer. I suggest we
get this tested first, and then move onto addressing the remaining
issues.

By the way, you might want to merge this bug with bug #341187.

Greetings,
-- 
Javier Kohen <[EMAIL PROTECTED]>
ICQ: blashyrkh #2361802
Jabber: [EMAIL PROTECTED]
Sólo en most-4.10.2: build-stamp
Sólo en most-4.10.2: config.log
Sólo en most-4.10.2: config.status
diff -ur most-4.10.2-1.debian-orig/debian/changelog most-4.10.2/debian/changelog
--- most-4.10.2-1.debian-orig/debian/changelog	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/debian/changelog	2005-12-04 04:06:21.000000000 -0300
@@ -1,3 +1,10 @@
+most (4.10.2-1.0.1) unstable; urgency=low
+
+  * Reworked RegExp searches, so they now work.
+  * Properly handle UTF-8 data.
+
+ -- Javier Kohen <[EMAIL PROTECTED]>  Mon, 28 Nov 2005 04:07:10 -0300
+
 most (4.10.2-1) unstable; urgency=low
 
   * New upstream release.
Sólo en most-4.10.2/debian: files
Sólo en most-4.10.2/debian: most
Sólo en most-4.10.2/debian: most.substvars
Sólo en most-4.10.2: Makefile
diff -ur most-4.10.2-1.debian-orig/src/buffer.c most-4.10.2/src/buffer.c
--- most-4.10.2-1.debian-orig/src/buffer.c	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/buffer.c	2005-11-29 03:42:09.000000000 -0300
@@ -61,24 +61,27 @@
      {
 	if (*pos == '\n')
 	  {
-	     pos--;
+	     pos--; /* Skip back the new-line. */
 	     while ((pos > Most_Beg)
 		    && (*pos != '\n'))
-	       pos--;
+	       pos = SLutf8_bskip_char(Most_Beg, pos);
 
 	     if (*pos != '\n') return pos;
+	     /* from here on *pos == '\n' */
 	     if (pos + 1 != cpos)
 	       return pos + 1;
 	  }
      }
-   else pos--;
+   else
+     pos = SLutf8_bskip_char(Most_Beg, pos);
 
    if (*pos != '\n')
      {
 	while ((pos > Most_Beg)
 	       && (*pos != '\n'))
-	  pos--;
+	  pos = SLutf8_bskip_char(Most_Beg, pos);
 	if (*pos != '\n') return Most_Beg;
+	/* from here on *pos == '\n' */
 	return pos + 1;
      }
 
@@ -98,12 +101,15 @@
 {
    unsigned int col = 0;
 
+   if (Most_UTF8_Mode)
+     return SLutf8_skip_chars(b, e, num_cols, &col, 0);
+
    while ((b < e)
 	  && (col < num_cols))
      {
 	unsigned char ch = *b++;
-	if (((ch >= ' ') && (ch < 0x7F))
-	    || (ch >= SLsmg_Display_Eight_Bit))
+
+	if (most_isprint(ch))
 	  {
 	     col++;
 	     continue;
@@ -545,7 +551,10 @@
     /* Now we have found the line it is on so.... */
    beg = most_beg_of_line();
    *c = 1;
-   while (beg++ < pos) *c = *c + 1;
+   if (Most_UTF8_Mode)
+     while ((beg = SLutf8_skip_char(beg, pos)) < pos) *c = *c + 1;
+   else
+     while (beg++ < pos) *c = *c + 1;
    Most_C_Line = save_line;
    Most_C_Offset = save_offset;
 }
Sólo en most-4.10.2/src: config.h
diff -ur most-4.10.2-1.debian-orig/src/keym.c most-4.10.2/src/keym.c
--- most-4.10.2-1.debian-orig/src/keym.c	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/keym.c	2005-12-04 04:41:13.000000000 -0300
@@ -309,7 +309,7 @@
 #else
 				 "Search: ",
 #endif
-				 Most_Search_Str,
+				 (char *) Most_Search_Str,
 				 MOST_SEARCH_BUF_LEN
 				 ) == -1) return;
    Most_Curs_Offset = Most_C_Offset;
@@ -325,7 +325,7 @@
 #else
 				 "Search Backwards: ",
 #endif
-				 Most_Search_Str,
+				 (char *) Most_Search_Str,
 				 MOST_SEARCH_BUF_LEN) == -1) return;
    find_next_cmd();
 }
diff -ur most-4.10.2-1.debian-orig/src/line.c most-4.10.2/src/line.c
--- most-4.10.2-1.debian-orig/src/line.c	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/line.c	2005-11-29 03:51:31.000000000 -0300
@@ -87,8 +87,7 @@
    while (b < end)
      {
         ch = *b++;
-	if (((ch >= ' ') && (ch < 0x7F))
-	    || (ch >= SLsmg_Display_Eight_Bit))
+	if (most_isprint(ch))
 	  {
 	     *s++ = ch;
 	     continue;
@@ -114,15 +113,22 @@
    SLsmg_erase_eol ();
 }
 
+int most_isprint(unsigned char ch)
+{
+   /* Can this be directly replaced with isprint? */
+   return (ch >= ' ' && ch < 0x7F) || ch >= SLsmg_Display_Eight_Bit;
+}
+
 static int most_analyse_line(unsigned char *begg, unsigned char *endd, 
-			     char *out, char *attributes)
+			     unsigned char *out, char *attributes)
 {
-   unsigned char *beg, *end;
+   unsigned char *beg, *end, *pout;
    unsigned int min_col, max_col;
    unsigned int i, i_max;
 
    beg = begg;
    end = endd;
+   pout = out;
    i = i_max = 0;
    min_col = Most_Column - 1;
    max_col = min_col + SLtt_Screen_Cols;
@@ -130,9 +136,9 @@
    while (beg < end)
      {
 	char attr = ' ';
-	unsigned char ch;
+	unsigned char ch = *beg++;
 
-	if ('\n' == (ch = *beg++))
+	if ('\n' == ch)
 	  break;
 	
 	if ((ch == '\r') && (Most_V_Opt == 0))
@@ -146,7 +152,13 @@
 	  {
 	     if (i > i_max) i_max = i;
 	     if (i > 0)
-	       i--;
+	       {
+		  if (Most_UTF8_Mode)
+		    pout = SLutf8_bskip_char(out, pout);
+		  else
+		    pout--;
+		  i--;
+	       }
 	     continue;
 	  }
 	
@@ -155,12 +167,12 @@
 	     attr = 'b';
 	     if ((i >= min_col) && (i < max_col))
 	       {
-		  if (out[i-min_col] == '_')
+		  if (*pout == '_')
 		    attr = 'u';
 		  else if (ch == '_')
 		    {
 		       attr = 'u';
-		       ch = out[i - min_col];
+		       ch = *pout;
 		    }
 	       }
 	     if (ch == ' ')
@@ -170,23 +182,30 @@
 	       }
 	     /* drop */
 	  }
-	
-	if ((ch >= ' ') && (ch < 0x7F))
-	  {
+
+	if (Most_UTF8_Mode) {
+	   unsigned char *prev = --beg;
+	   int len;
+	   beg = SLutf8_skip_char(beg, end);
+	   len = beg - prev;
+	   if (len > 1) {
+	     /* Non-ASCII char, display it. */
 	     if ((i >= min_col) && (i < max_col))
 	       {
-		  out[i-min_col] = ch;
+		  memcpy(pout, prev, len);
+		  pout += len;
 		  attributes[i-min_col] = attr;
 	       }
 	     i++;
 	     continue;
-	  }
-	
-	if (ch >= SLsmg_Display_Eight_Bit)
+	   }
+	}
+
+	if (most_isprint(ch))
 	  {
 	     if ((i >= min_col) && (i < max_col))
 	       {
-		  out[i-min_col] = ch;
+		  *pout++ = ch;
 		  attributes[i-min_col] = attr;
 	       }
 	     i++;
@@ -201,7 +220,7 @@
 	       {
 		  if ((i >= min_col) && (i < max_col))
 		    {
-		       out[i-min_col] = ' ';
+		       *pout++ = ' ';
 		       attributes[i-min_col] = attr;
 		    }
 		  i++;
@@ -214,7 +233,7 @@
 	  {
 	     if ((i >= min_col) && (i < max_col))
 	       {
-		  out[i-min_col] = '~';
+		  *pout++ = '~';
 		  attributes[i-min_col] = attr;
 	       }
 	     i++;
@@ -224,7 +243,7 @@
 	
 	if ((i >= min_col) && (i < max_col))
 	  {
-	     out[i-min_col] = '^';
+	     *pout++ = '^';
 	     attributes[i-min_col] = attr;
 	  }
 	i++;
@@ -234,7 +253,7 @@
 	
 	if ((i >= min_col) && (i < max_col))
 	  {
-	     out[i-min_col] = ch;
+	     *pout++ = ch;
 	     attributes[i-min_col] = attr;
 	  }
 	i++;
@@ -265,7 +284,7 @@
 	       {
 		  if (i < max_col)
 		    {
-		       out[i] = '.';
+		       *pout++ = '.';
 		       attributes[i] = ' ';
 		    }
 		  i++;
@@ -282,15 +301,16 @@
 
    i -= min_col;
 
-   out[i] = 0;
+   *pout = 0;
    attributes[i] = 0;
    return i_max;
 }
 
 static void output_with_attr (unsigned char *out, unsigned char *attr)
 {
-   unsigned char at, ch, lat;
+   unsigned char at, lat;
    unsigned char *p = out;
+   unsigned char *pmax = p + strlen((char *) p);
 
    if (Most_V_Opt) 
      {
@@ -299,7 +319,7 @@
      }
 
    lat = ' ';
-   while ((ch = *p) != 0)
+   while (p < pmax)
      {
 	if (lat != *attr)
 	  {
@@ -321,7 +341,7 @@
 	     else most_tt_normal_video ();
 	     lat = at;
 	  }
-	p++;
+	p = SLutf8_skip_char(p, pmax);
 	attr++;
      }
 
@@ -341,7 +361,6 @@
 {
    unsigned char *beg, *end;
    unsigned int len;
-   unsigned char dollar;
    static unsigned char *line;
    static unsigned char *attr;
    static unsigned int line_len;
@@ -368,26 +387,8 @@
 
    (void) most_extract_line (&beg, &end);
 
-   len = most_analyse_line(beg, end, (char *) line, (char *) attr);
+   len = most_analyse_line(beg, end, line, (char *) attr);
 
-   dollar = 0;
-   if (Most_W_Opt)
-     {
-	if ((end < Most_Eob)
-	    && (*end != '\n'))
-	  dollar = '\\';
-     }
-   else if (len > (unsigned int) SLtt_Screen_Cols + (Most_Column - 1))
-     dollar = '$';
-   
-   if (dollar)
-     {
-	line[SLtt_Screen_Cols-1] = dollar;
-	attr[SLtt_Screen_Cols-1] = ' ';
-	line[SLtt_Screen_Cols] = 0;
-	attr[SLtt_Screen_Cols] = 0;
-     }
-   
    output_with_attr (line, attr);
    SLsmg_erase_eol ();
 }
@@ -398,21 +399,34 @@
 int most_apparant_distance (unsigned char *pos)
 {
    int i;
-   unsigned char *save_pos, ch;
+   unsigned char *save_pos, *beg, ch;
    unsigned int save_offset;
 
    save_offset = Most_C_Offset;
    save_pos = pos;
    Most_C_Offset = (unsigned int) (pos - Most_Beg);
-   pos = most_beg_of_line();
+   beg = pos = most_beg_of_line();
    Most_C_Offset = save_offset;
 
    i = 0;
    while (pos < save_pos)
      {
-	ch = *pos++;
-	if (((ch >= ' ') && (ch < 0x7F))
-	    || (ch >= SLsmg_Display_Eight_Bit))
+	ch = *pos;
+
+	if (Most_UTF8_Mode) {
+	   unsigned char *prev = pos;
+	   int len;
+	   pos = SLutf8_skip_char(pos, save_pos);
+	   len = pos - prev;
+	   if (len > 1) {
+	     i++;
+	     continue;
+	   }
+	} else {
+	   pos++;
+	}
+
+	if (most_isprint(ch))
 	  {
 	     i++;
 	     continue;
@@ -420,7 +434,13 @@
 
 	if (!Most_V_Opt && (ch == '\b'))
 	  {
-	     if (i > 0) i--;
+	     if (i > 0)
+	       {
+		  if (Most_UTF8_Mode)
+		    i -= pos - SLutf8_bskip_char(beg, pos);
+		  else
+		    i--;
+	       }
 	  }
 	else if (!Most_V_Opt && (ch == '\015')) /* ^M */
 	  {
diff -ur most-4.10.2-1.debian-orig/src/line.h most-4.10.2/src/line.h
--- most-4.10.2-1.debian-orig/src/line.h	2005-07-02 00:04:58.000000000 -0300
+++ most-4.10.2/src/line.h	2005-11-28 02:20:37.000000000 -0300
@@ -7,5 +7,6 @@
 
 extern void most_display_line(void);
 extern int most_apparant_distance(unsigned char *);
+extern int most_isprint(unsigned char ch);
 #endif
 
Sólo en most-4.10.2/src: Makefile
diff -ur most-4.10.2-1.debian-orig/src/most.c most-4.10.2/src/most.c
--- most-4.10.2-1.debian-orig/src/most.c	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/most.c	2005-11-29 03:31:01.000000000 -0300
@@ -125,7 +125,7 @@
    ch = *(++str);
    if ( ch == '/')
      {
-	strcpy (Most_Search_Str,++str);
+	strcpy ((char *) Most_Search_Str,++str);
 	return;
      }
 
@@ -456,13 +456,7 @@
 
    SLtt_get_terminfo();
 #if SLANG_VERSION >= 20000
-#if 0
-   Most_UTF8_Mode = SLutf8_enable (1);
-   if (Most_UTF8_Mode)
-     {
-	fprintf (stderr, "UTF-8 Mode is in effect\n");
-     }
-#endif
+   Most_UTF8_Mode = SLutf8_enable (-1);
 #endif
    SLtt_Ignore_Beep = 1;
    if (No_Colors) 
diff -ur most-4.10.2-1.debian-orig/src/most.h most-4.10.2/src/most.h
--- most-4.10.2-1.debian-orig/src/most.h	2005-07-02 00:04:58.000000000 -0300
+++ most-4.10.2/src/most.h	2005-11-29 00:54:08.000000000 -0300
@@ -1,4 +1,5 @@
 #include "config.h"
+#define SLANG_REGEXP
 extern int Most_S_Opt;
 extern int Most_A_Opt;             /* automatically choose -b if necessary */
 extern int Most_V_Opt;             /* display control chars */
Sólo en most-4.10.2/src: objs
diff -ur most-4.10.2-1.debian-orig/src/search.c most-4.10.2/src/search.c
--- most-4.10.2-1.debian-orig/src/search.c	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/search.c	2005-12-04 04:35:11.000000000 -0300
@@ -20,6 +20,7 @@
 */
 #include "config.h"
 
+#include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #include <slang.h>
@@ -31,18 +32,18 @@
 #include "display.h"
 #include "search.h"
 
-/* Note!!!  The regular expression searches may not work.  I have not
- * tested them.
- * FIXME!!!
- */
-
 int Most_Case_Sensitive = 0;
-char Most_Search_Str[256];
+unsigned char Most_Search_Str[256];
 int Most_Search_Dir = 1;
 
 #include "jdmacros.h"
 
-#define UPCASE(ch) ((!Most_Case_Sensitive && (ch <= 'z') && (ch >= 'a')) ? (ch - 32) : ch)
+#if SLANG_VERSION < 20000
+# define NORM_CHAR(ch) ((!Most_Case_Sensitive) ? toupper(ch) : ch)
+# define UPCASE(ch) NORM_CHAR(ch)
+#else
+# define NORM_CHAR(ch) (ch)
+#endif
 
 #if	defined(HAVE_V8_REGCOMP) || defined(SLANG_REGEXP)
 
@@ -78,7 +79,7 @@
  * This function is called by the V8 regcomp to report
  * errors in regular expressions.
  */
-static void regerror(char *s)
+static void regerror(const char *s)
 {
    char	string[256];
 
@@ -95,13 +96,17 @@
  *	      0	 error
  *
  */
-static int do_regcomp(unsigned char *key)
+static int do_regcomp(const unsigned char *key)
 {
    static int old_Most_Case_Sensitive;
-   unsigned char UpCaseKey[sizeof(savepattern)];
 # ifndef HAVE_V8_REGCOMP
    int	posn;			/* reg exp error at this offset */
 # endif
+# if SLANG_VERSION < 20000
+   unsigned char UpCaseKey[sizeof(savepattern)];
+# else
+   int re_flags = 0;
+# endif
 
    /*
     *	Only recompile search string if it has changed
@@ -124,6 +129,7 @@
 
    old_Most_Case_Sensitive = Most_Case_Sensitive;
 
+# if SLANG_VERSION < 20000
    if ( Most_Case_Sensitive == 0 )
      {
 	register unsigned char	*p;		/* ptr to UpCaseKey */
@@ -141,6 +147,7 @@
 
 	*p = '\0';
      }
+# endif
 
    strcpy((char *)savepattern, (char *)key);
 
@@ -162,7 +169,12 @@
 #  else
    if (Regexp != NULL)
      SLregexp_free (Regexp);
-   if (NULL == (Regexp = SLregexp_compile ((char *)key, Most_Case_Sensitive ? 0 : SLREGEXP_CASELESS)))
+
+   if (!Most_Case_Sensitive)
+     re_flags |= SLREGEXP_CASELESS;
+   if (Most_UTF8_Mode)
+     re_flags |= SLREGEXP_UTF8;
+   if (NULL == (Regexp = SLregexp_compile ((char *)key, re_flags)))
      posn = -1;
    else
      posn = 0;
@@ -187,7 +199,7 @@
  * Call the appropriate regular expression execute function
  */
 
-static unsigned char *do_regexec(unsigned char *string)
+static unsigned char *do_regexec(const unsigned char *string, size_t length)
 {
 # ifdef	HAVE_V8_REGCOMP
    if ( regexec(regpattern, (char *)string) )
@@ -196,40 +208,46 @@
      return( NULL );
 # else
 #  if SLANG_VERSION < 20000
-   return ( SLang_regexp_match(string, strlen((char *)string), &regdata) );
+   return ( SLang_regexp_match(string, length, &regdata) );
 #  else
-   return (unsigned char *)SLregexp_match (Regexp, (char *)string, strlen ((char *)string));
+   return (unsigned char *)SLregexp_match (Regexp, (char *)string, length);
 #  endif
 # endif	/* HAVE_V8_REGCOMP */
 }
 
 /*
- *  Make a upper case copy of a string.	 Also changes any "c\b" character
- *  strings into just "" so that highlighted and underlined characters
- *  can be searched.
+ *  Changes any "c\b" character strings into just "" so that
+ *  highlighted and underlined characters can be searched.  Stores in
+ *  length the new size of the string, after the aforementioned
+ *  changes.
+ *
+ *  If using a version of S-Lang that does not support case
+ *  insensitive regular expressions, this function upper cases the
+ *  input string, as well.
  *
  *  Reuses malloced memory, so a copy cannot be retained between calls.
  */
 
-static unsigned char *StrUpCaseCopy(unsigned char *input)
+static const unsigned char *StrNormCopy(const unsigned char *input,
+					size_t *length)
 {
    static unsigned char *uppercase;	/* ptr to malloced area */
    static size_t	  bufsize;	/* size of malloced area */
-   unsigned char	 *src;		/* ptr to source */
+   const unsigned char	 *src;		/* ptr to source */
+   const unsigned char	 *end;		/* ptr to end of source */
    register unsigned char *dest;	/* ptr to destination */
    register int	  idx;	/* index into uppercase[] */
-   register unsigned char c;		/* source character */
-   size_t		  length;	/* size of string to copy */
 
    src = input;
-   length = strlen((char *)src) + 1;	/* len of line plus terminator */
+   end = input + *length;
 
-   if ( length > bufsize )
+   if ( *length >= bufsize )
      {
 	if ( uppercase != (unsigned char *)NULL )
 	  free(uppercase);
 
-	bufsize = (length > 256 ) ? length : 256;	/* 256 byte default */
+	/* len of line plus terminator */
+	bufsize = (*length >= 256 ) ? *length + 1 : 256; /* 256 byte default */
 
 	uppercase = (unsigned char *)malloc(bufsize);
 	if ( uppercase == (unsigned char *)NULL )
@@ -242,8 +260,9 @@
 
    dest = uppercase;
 
-   for ( idx = 0 ; (c = *src) != '\0' ; src++ )
+   for ( idx = 0 ; src < end ; src++ )
      {
+	unsigned char c = *src;
 	if ( c == '\b' )		/* backspace */
 	  {
 	     if ( idx-- > 0 )
@@ -252,54 +271,79 @@
 	else
 	  {
 	     if ( idx++ >= 0 )
-	       *dest++ = UPCASE(c);
+	       *dest++ = NORM_CHAR(c);
 	  }
      }
 
    *dest = '\0';		/* add termination */
 
+   *length = dest - uppercase;
    return(uppercase);
 }
 
 /*
- *  Given an offset into a copy made by StrUpCaseCopy() and a pointer to the
+ *  Given an offset into a copy made by StrNormCopy() and a pointer to the
  *  original string, returns a pointer into the original string corresponding
  *  to this offset.
  */
 
-static unsigned char *GetOrigPtr(unsigned char *original, int offset)
+static const unsigned char *
+GetOrigPtr(const unsigned char *original, int offset,
+	   const unsigned char *end)
 {
-   register unsigned char *p = original;
+   const unsigned char *p = original;
    register int	    j = offset;
 
     /*
      *	Step through, adjusting offset according to backspaces found
      */
-   while ( *p != '\0' )
+   while ( p < end )
      {
+	const unsigned char *next;
+	if (Most_UTF8_Mode)
+	  next = SLutf8_skip_char((unsigned char*) p, (unsigned char*) end);
+	else
+	  next = p + 1;
+	size_t length_last = next - p;
+
 	if ( *p == '\b' )
-	  j++;
+	  j += length_last;
 	else
-	  j--;
+	  j -= length_last;
 
 	if ( j < 0 )
 	  break;
 	else
-	  p++;
+ 	  p = next;
      }
 
    return(p);
 }
 #endif	/* HAVE_V8_REGCOMP || SLANG_REGEXP */
 
+/* Returns a pointer to the first occurrence of '\n' in string beg, or
+ * end if no '\n' can be found between inclusive beg and exclusive
+ * end.
+ */
+static const unsigned char *
+find_eol(const unsigned char *beg, const unsigned char *end)
+{
+   const unsigned char *p;
+   if ( (p = memchr(beg, '\n', end - beg)) != NULL)
+     return p;
+   else
+     return end;
+}
+
 /* This routine returns the 1 + position of first match of key in str.
    key is modified to match the case of str. */
 /* We should try to optimize this routine */
 /* searches from beg up to but not including end */
 
-static unsigned char *forw_search_region(unsigned char *beg,
-					 unsigned char *end,
-					 unsigned char *key)
+static const unsigned char *
+forw_search_region(const unsigned char *beg,
+		   const unsigned char *end,
+		   const unsigned char *key)
 {
 #if	defined(HAVE_V8_REGCOMP) || defined(SLANG_REGEXP)
     /*
@@ -307,10 +351,11 @@
      *	to be broken into lines.
      *
      */
-   unsigned char	*p;		/* temp pointer */
-   unsigned char	*linebeg;	/* beginning of working line */
-   unsigned char	*copy;		/* ptr to upper case copy */
+   const unsigned char	*linebeg;	/* beginning of working line */
+   const unsigned char	*lineend;	/* end of working line */
+   const unsigned char	*norm_line;	/* ptr to normalized line */
    unsigned char	*match;		/* ptr to matching string */
+   int			anchored_re;
 
     /*
      *	Compile "key" into an executable regular expression
@@ -318,58 +363,35 @@
    if ( do_regcomp(key) == 0 )
      return(Most_Eob);
 
-    /*
-     *	For regular expression searches we need to do a line by line
-     *	search, so it is necessary to temporarily replace '\n' with '\0'
-     *	characters.
-     */
-   p = beg;
-   linebeg = beg;
+   anchored_re = key[0] == '^';
 
-   while (linebeg < end)
+   for ( linebeg = beg ; linebeg < end ; linebeg = lineend + 1 )
      {
-	while ((p < end) && (*p != '\n')) p++;
-	if (p == end) break;
-	*p = 0;
+	size_t length;
 
-	if ( Most_Case_Sensitive == 0 )	/* i.e. case insensitive */
-	  {
-	     copy = StrUpCaseCopy(linebeg);
-	     if ( copy == (unsigned char *)NULL )
-	       return(Most_Eob);
-	  }
+ 	lineend = find_eol(linebeg, end);
+
+	length = lineend - linebeg;
+	if (0 == length) continue; /* Skip empty lines. */
+
+	norm_line = StrNormCopy(linebeg, &length);
+	if ( norm_line == NULL )
+	  return(Most_Eob);
 
 	/*
 	 * Quick sanity check for beginning of line archored tests.
-	 * If 1st char of key is "^", then the character before linebeg (which
-	 * must be beyond the start of the window), must be a "\n",
-	 * otherwise do_regexec() isn't called.
+	 * If 1st char of key is "^", then the character before
+	 * linebeg (which must be within the buffer), must be a "\n".
 	 */
-	if ( 
-# if 0
-	     ((*key != '^') 
-	      || (linebeg > Most_Win->beg_pos && linebeg[-1] == '\n'))
-	     &&
-#endif
-	     (match = do_regexec(Most_Case_Sensitive ? linebeg : copy)))
+	if ( !(anchored_re && (linebeg <= Most_Beg || linebeg[-1] != '\n'))
+	     && (match = do_regexec(norm_line, length)) )
 	  {
-	     *p = '\n';
-	     if ( Most_Case_Sensitive == 0 )
-	       {
-		/*
-		 *  Use offset into "copy" as idx to find point in
-		 *  real line.
-		 */
-		  return( GetOrigPtr(linebeg, match - copy) );
-	       }
-	     else
-	       {
-		  return( match );
-	       }
+	     /*
+	      *  Use offset into "norm_line" as idx to find point in
+	      *  real line.
+	      */
+	     return( GetOrigPtr(linebeg, match - norm_line, lineend) );
 	  }
-
-	*p++ = '\n';
-	linebeg = p;
      }
 
    return(Most_Eob);
@@ -475,23 +497,18 @@
  *  pattern "key".
  */
 
-static unsigned char *back_search_region(unsigned char *beg,
-					 unsigned char *end,
-					 unsigned char *key)
+static const unsigned char *
+back_search_region(const unsigned char *beg,
+		   const unsigned char *end,
+		   const unsigned char *key)
 {
 #if	defined(HAVE_V8_REGCOMP) || defined(SLANG_REGEXP)
-   register unsigned char	*p;
-   unsigned char		*endp,		/* end of line */
-   *lastmatch,	/* last match in line */
+   const unsigned char	*p;
+   const unsigned char	*endp,		/* end of line */
+   *lastmatch,		/* last match in line */
    *endprevline,	/* end of line before this one */
    *match;		/* ptr to matching string */
-   unsigned char		savec;		/* last char on line */
-
-    /*
-     *	Compile "key" into an executable regular expression
-     */
-   if ( do_regcomp(key) == 0 )
-     return(Most_Eob);
+   int			anchored_re;
 
     /*
      *	Starting from the end of the buffer, break the buffer into lines
@@ -501,84 +518,60 @@
      *	and isn't that what we want to do in a reverse search.
      */
    endp = end;
-   lastmatch = Most_Eob;
-   while ( 1 )			/* forever loop */
-     {
-	if ( (endp < beg) )
-	  return(Most_Eob);		/* Reach start of buffer, no match */
+   endprevline = end;
+   match = Most_Eob;
 
-	/* Find the real end of current line */
-	if ( (p = (unsigned char *)strchr((char *)endp, '\n')) != NULL )
-	  endp = p;
-
-	savec = *endp;
-	*endp = '\0';			/* terminate line with NULL */
+   /* Find out whether the regexp attempts to match a line boundary.
+    * In this case, only a match on the full line should be attempted.
+    */
+   anchored_re = key[strlen((const char *) key)-1] == '$';
 
+   while ( endp > beg )
+     {
 	/* Find the beginning of line */
 	for ( p = endp - 1 ; (p >= beg) && (*p != '\n') ; p-- )
 	  {
 	  }
 
+	if ( p < beg )
+	  break;
+
 	endprevline = p;
 
-	p++;			/* point to 1st char after newline */
+	/*
+	 * Quick sanity check for end of line archored tests.  If last
+	 * char of key is "$", then the character after endp (which
+	 * must be within the buffer), must be a "\n".
+	 */
+	if ( anchored_re && endp < Most_Eob && endp[0] != '\n' )
+	  {
+	     endp = p;
+	     continue;
+	  }
 
 	/*
 	 *  Keep searching forward in this line till no more matches
 	 */
-	if ( Most_Case_Sensitive == 0 )		/* i.e. case insensitive */
+	do
 	  {
-	     unsigned char	*copy;		/* ptr to upper case copy */
-	     unsigned char	*savecopy;	/* copy of "copy" */
-
-	     copy = StrUpCaseCopy(p);
-	     if ( copy == (unsigned char *)NULL )
-	       return(Most_Eob);
+	     lastmatch = match;
 
-	     savecopy = copy;
-
-	    /*
-	     * Quick sanity check for beginning of line archored tests.
-	     * Must be at start of line.
-	     */
-	     while ( ((*key != '^') || (copy == savecopy))
-		    && (match = do_regexec(copy)) )
-	       {
-		  if ( GetOrigPtr(p, match - savecopy) > end )
-		    break;
-		  lastmatch = match;
-		  if ( *lastmatch == '\0' )	/* key must be "$" or "^" */
-		    break;
-		  copy = lastmatch + 1;		/* character after match */
-	       }
+	     if (Most_UTF8_Mode)
+	       p = SLutf8_skip_char((unsigned char*) p, (unsigned char*) endp);
+	     else
+	       p++;
 
-	     if ( lastmatch != Most_Eob )	/* found a match */
-	       lastmatch = GetOrigPtr(p, lastmatch - savecopy);
-	  }
-	else
-	  {
-	    /*
-	     * Quick sanity check for beginning of line archored tests.
-	     * Must be at start of buffer or start of line
-	     */
-	     while ( ( (*key != '^') || (p == endprevline + 1) )
-		    && (match = do_regexec(p)) )
-	       {
-		  if ( match > end )
-		    break;
-		  lastmatch = match;
-		  if ( *lastmatch == '\0' )	/* key must be "$" or "^" */
-		    break;
-		  p = lastmatch + 1;		/* character after match */
-	       }
+	     match = forw_search_region(p, endp, key);
 	  }
+	while ( match <= endp );
 
-	*endp = savec;
 	if ( lastmatch != Most_Eob )	/* found a match */
 	  return(lastmatch);
 
 	endp = endprevline;
      }
+
+   return(Most_Eob);		/* Reached start of buffer, no match */
 #else
    char ch, char1, work[256];
    unsigned char *pos;
@@ -670,7 +663,7 @@
 #endif	/* HAVE_V8_REGCOMP || SLANG_REGEXP */
 }
 
-long long most_search(unsigned char *from, int repeat, long long *col)
+long long most_search(const unsigned char *from, int repeat, long long *col)
 {
     /* return the line match was found as well as line number,
      * search from i on; assume that line_array match the i so we need
@@ -678,7 +671,7 @@
 
    long long test, save_line, the_col, row, s_len;
    char string[300];
-   unsigned char *pos;
+   const unsigned char *pos;
    unsigned int save_ofs;
    unsigned int found_ofs;
 
@@ -687,7 +680,10 @@
    save_line = Most_C_Line;
    found_ofs = Most_Eob - Most_Beg;
    *col = 0;
-   s_len = strlen (Most_Search_Str);
+   if (Most_UTF8_Mode)
+     s_len = SLutf8_strlen (Most_Search_Str, 0);
+   else
+     s_len = strlen ((char *) Most_Search_Str);
    pos = from;
 
    if (*Most_Search_Str)
diff -ur most-4.10.2-1.debian-orig/src/search.h most-4.10.2/src/search.h
--- most-4.10.2-1.debian-orig/src/search.h	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/search.h	2005-12-03 23:36:45.000000000 -0300
@@ -6,7 +6,7 @@
 extern int Most_Case_Sensitive;
 extern int Most_Search_Dir;
 #define MOST_SEARCH_BUF_LEN	256
-extern char Most_Search_Str[MOST_SEARCH_BUF_LEN];
-extern long long most_search(unsigned char *, int, long long *);
+extern unsigned char Most_Search_Str[MOST_SEARCH_BUF_LEN];
+extern long long most_search(const unsigned char *, int, long long *);
 #endif
 
diff -ur most-4.10.2-1.debian-orig/src/window.c most-4.10.2/src/window.c
--- most-4.10.2-1.debian-orig/src/window.c	2005-12-04 04:43:47.000000000 -0300
+++ most-4.10.2/src/window.c	2005-11-29 03:57:56.000000000 -0300
@@ -156,7 +156,7 @@
 	SLsmg_write_nchars (buf + point, len - point);
 	if (col < SLtt_Screen_Cols)
 	  break;
-	buf++; point--; len--;      /* FIXME for UTF-8 */
+	buf++; point--; len--;
      }
    SLsmg_erase_eol ();
    SLsmg_gotorc (SLtt_Screen_Rows - 1, col);
@@ -227,6 +227,8 @@
 {
    SLang_RLine_Info_Type *rli;
    unsigned int flags = SL_RLINE_BLINK_MATCH;
+   if (Most_UTF8_Mode)
+     flags |= SL_RLINE_UTF8_MODE;
    
    if (NULL == (rli = SLrline_open (SLtt_Screen_Cols, flags)))
      return NULL;
@@ -258,7 +260,7 @@
 
    /* do not use default.  The up arrow can always get it back. */
    if ((what != NULL) 
-       && (*what) && (what != Most_Search_Str))
+       && (*what) && (what != (char *) Most_Search_Str))
      {
 	if (-1 == SLrline_set_line (Most_RLI, what))
 	  return -1;

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to