----- Forwarded message from Jim Meyering <[EMAIL PROTECTED]> -----

Date: Fri, 19 Oct 2007 23:50:58 +0200
From: Jim Meyering <[EMAIL PROTECTED]>
To: Steven Schubiger <[EMAIL PROTECTED]>
Subject: Re: linecut (warnocked?)

Steven Schubiger <[EMAIL PROTECTED]> wrote:

> Jim Meyering <[EMAIL PROTECTED]> wrote:
>
>> If you want to get something "out there" soon,
>> doing it as a separate tool initially would be
>> a good idea.  That wouldn't keep you from proposing
>> it again, later.
>
> I'm inclined to think we should do it right before I release again.
>
>> As spec'd, it looks like a very challenging project.
>> Are you close to completing it?
>
> Yes, pretty close (if you subtract documentation et al).
>
> elide_lines_seekable() has been written from "scratch" and works
> the same for input from a pipe or file stream (no *alloc() overhead).

Good!
So it copies all input to a temporary file
when it's from a pipe?

> The parsing of the current parameters is fully functionable, which
> results in --range supporting N, -N & +N combinations.
>
> Line numbering is similar to cat, i.e. `linecut --range 1,-1 --number
> <file>` is exactly the same as `cat -n <file>` (proved via `diff -u`).
>
> So, I'm looking for a bit of advice concerning data types, eventual
> efficiency hacks and overall implementation improvement.

IMHO, you'll get good feedback (more volume and quicker)
if you post to the list and ask.

----- End forwarded message -----
/* linecut -- output slices of lines from file(s)
   Copyright (C) 2007 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <config.h>

#include <stdio.h>
#include <getopt.h>
#include <stdlib.h>

#include "system.h"

#include "error.h"
#include "quote.h"
#include "safe-read.h"

/* The official name of this program (e.g., no `g' prefix).  */
#define PROGRAM_NAME "linecut"

#define AUTHORS "Steven Schubiger"

/* Maximum of allowed range sets. */
#define MAX_RANGE_SETS (4 * 2)

/* Macro for validation of range components. */
#define VALID_RANGE(range) (ISDIGIT (range) || range == '-' || range == '+')

/* If true, range parameter provided.  */
static bool range_mode;

/* If true, print numbered lines. */
static bool number_mode;

/* If true, print filename headers. */
static bool print_headers;

/* When to print the filename banners. */
enum header_mode
{
  multiple_files, always, never
};

/* Are we reading from standard input?  */
static bool is_stdin;

/* Have we ever read standard input?  */
static bool have_read_stdin;

/* The name this program was run with. */
static char *program_name;

/* The iterator for the ranges set. */
static int range_max;

/* The sets of ranges. */
static long ranges[MAX_RANGE_SETS];

/* The file offsets to seek for. */
static off_t offsets[MAX_RANGE_SETS];

/* How many lines the stream consists of. */
static int lines;

/* Container for file-descriptor and handle. */
static struct 
{
  int fd;
  FILE *fh;
} input;

/* Buffer for line numbers.
   An 11 digit counter may overflow within an hour on a P2/466,
   an 18 digit counter needs about 1000y */
#define LINE_COUNTER_BUF_LEN 20
static char line_buf[LINE_COUNTER_BUF_LEN] =
  {
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
    '\t', '\0'
  };

/* Position in `line_buf' where printing starts.  This will not change
   unless the number of lines is larger than 999999.  */
static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;

/* Position of the first digit in `line_buf'.  */
static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;

/* Position of the last digit in `line_buf'.  */
static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;

/* Preserves the `cat' function's local `newlines' between invocations.  */
static int newlines2 = 0;

static struct option const long_options[] =
{
  {"number", no_argument, NULL, 'n'},
  {"range", required_argument, NULL, 'r'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
  {"verbose", no_argument, NULL, 'v'},
  {GETOPT_HELP_OPTION_DECL},
  {GETOPT_VERSION_OPTION_DECL},
  {NULL, 0, NULL, 0}
};

void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    fprintf (stderr, _("Try `%s --help' for more information.\n"),
	     program_name);
  else
    {
      printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
"),
	      program_name);
      fputs (_("\
Print slices of lines of each FILE to standard output.\n\
With more than one FILE, precede each with a header giving the file name.\n\
With no FILE, or when FILE is -, read standard input.\n\
\n\
"), stdout);
      fputs (_("\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
      fputs (_("\
  -n, --number             number all output lines\n\
  -r, --range=N            slices of lines to be output\n\
"), stdout);  
      fputs (_("\
  -q, --quiet, --silent    never print headers giving file names\n\
  -v, --verbose            always print headers giving file names\n\
"), stdout);
      fputs (HELP_OPTION_DESCRIPTION, stdout);
      fputs (VERSION_OPTION_DESCRIPTION, stdout);    
      fputs (_("\
\n\
N must conform to following format:\n\
N_START:N_END, ...\n\
N_START or N_END may be an absolute line position or relative-to-EOF one.\n\
"), stdout);
      emit_bug_reporting_address ();
    }
  exit (status);
}

static void
write_header (const char *filename)
{
  static bool first_file = true;

  printf ("%s==> %s <==\n", (first_file ? "" : "\n"), filename);
  first_file = false;
}

static void
next_line_num (void)
{
  char *endp = line_num_end;
  do
    {
      if ((*endp)++ < '9')
	return;
      *endp-- = '0';
    }
  while (endp >= line_num_start);
  if (line_num_start > line_buf)
    *--line_num_start = '1';
  else
    *line_buf = '>';
  if (line_num_start < line_num_print)
    line_num_print--;
}

static bool
elide_lines_seekable (const char *filename, int fd, 
                      uintmax_t line_from, uintmax_t line_to, 
                      off_t offset_from, off_t offset_to)
{
  char buffer[BUFSIZ];
  char *buf, *next, *curr;
  bool seen_complete_line = true;
  uintmax_t line_current;

  if ((lseek (fd, offset_from, SEEK_SET)) < 0)
    {
      error (0, errno, _("cannot lseek %s"), quote (filename));
      return false;
    }  
  line_current = line_from;
  while (1)
    {         
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_write = 0;

      if (bytes_read == SAFE_READ_ERROR)
        {      
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      buf = buffer;
      for (; line_current <= line_to; line_current++)
        {
          if (bytes_to_write < bytes_read)
            {
              /* full line with trailing newline detected. */
              if (strstr (buf, "\n") != NULL)
                {
                  curr = (char *) rawmemchr (buf, '\n');
                  next = curr;
                  next++;
                  *curr = '\0';

                  if (number_mode && seen_complete_line)
                    {
                      if (fwrite (line_num_print, 1, strlen (line_num_print), stdout) < strlen (line_num_print))
                        {
                          error (0, errno, _("write error"));
                          return false;
                        }
                      next_line_num ();
                    }
                  seen_complete_line = true;

                  if (fwrite (buf, 1, curr - buf, stdout) < curr - buf)
                    {
                      error (0, errno, _("write error"));
                      return false;
                    }
                  if (fwrite ("\n", 1, 1, stdout) < 1)
                    {
                      error (0, errno, _("write error"));
                      return false;
                    }

                  bytes_to_write += curr - buf;
                  buf = next;
	        }
              /* part of line with no trailing newline. */
              else 
                {  
                  if (number_mode)
                    {
                      if (fwrite (line_num_print, 1, strlen (line_num_print), stdout) < strlen (line_num_print))
                        {
                          error (0, errno, _("write error"));
                          return false;
                        }
                      next_line_num ();
                    }
                  if (fwrite (buf, 1, buffer + BUFSIZ - buf, stdout) < buffer + BUFSIZ - buf)
                    {
                      error (0, errno, _("write error"));
                      return false;
                    }
                  bytes_to_write += buffer + BUFSIZ - buf;
                  seen_complete_line = false;
                  break;
		}
            }
          else
            break;
        }
    }
  return true;
}

static void
relative_range_to_absolute (long *range_pos) 
{
  if (*range_pos < 0)
    *range_pos += lines + 1;
}

static bool
range_lines (const char *filename, int fd)
{
  bool ok = true;
  int i;
  long line_from, line_to;
  off_t offset_from, offset_to;

  if (print_headers)
    write_header (filename);
  
  for (i = 0; i < range_max; i += 2)
    {
      line_from = ranges[i];
      line_to = ranges[i+1];

      relative_range_to_absolute (&line_from);
      relative_range_to_absolute (&line_to);
  
      offset_from = offsets[i];
      offset_to = offsets[i+1];

      ok &= elide_lines_seekable (filename, fd, line_from, line_to, offset_from, offset_to);
    }
  return ok;
}

static bool
determine_seek_offsets (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  int i;
  long line_iter, range_abs;
  size_t bytes_offset;
  off_t seek_offset;

  if ((lseek (fd, 0, SEEK_SET)) < 0)
    {
      error (0, errno, _("cannot lseek %s"), quote (filename));
      return false;
    }
  for (i = 0, line_iter = 0, bytes_offset = 0, seek_offset = 0; 1;)
    {
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_iter = 0;

      if (bytes_read == SAFE_READ_ERROR)
	{
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      while (bytes_to_iter < bytes_read)
        {
          if (buffer[bytes_to_iter] == '\n') 
            {
              line_iter++;
              break;
            }
          bytes_to_iter++;
        }
      bytes_offset += bytes_to_iter + 1;

      range_abs = ranges[i];
      relative_range_to_absolute (&range_abs);
      if (line_iter == range_abs)
        offsets[i++] = seek_offset;

      if ((seek_offset = lseek (fd, bytes_offset, SEEK_SET)) < 0)
        { 
          error (0, errno, _("cannot lseek %s"), quote (filename));
          return false;
        }
    }
  return true;
}

static bool
count_lines_in_stream (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  uintmax_t total_lines;
 
  for (total_lines = 0; 1;) 
    {   
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_iter = 0;

      if (bytes_read == SAFE_READ_ERROR)
        {
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      while (bytes_to_iter < bytes_read)
        {
          if (buffer[bytes_to_iter] == '\n')
            total_lines++;
          bytes_to_iter++;
	}
    } 
  lines = total_lines;
  return true;
}

static bool
validate_ranges (const char *filename, int fd)
{
  bool ok;
  int i;
  long line_start, line_end;
  long seen_start, seen_end;

  ok = count_lines_in_stream (filename, fd);

  for (i = 0, seen_start = 0, seen_end = 0; i < range_max; i += 2) 
    {
      line_start = ranges[i];
      line_end = ranges[i+1];

      if (line_start == 0)
        error (EXIT_FAILURE, 0, _("starting line must not be 0"));
      if (line_end == 0)
        error (EXIT_FAILURE, 0, _("ending line must not be 0"));
      
      relative_range_to_absolute (&line_start);
      relative_range_to_absolute (&line_end);

      if (line_start <= seen_start)
        error (EXIT_FAILURE, 0, _("starting line overlaps with previous ending one"));
      if (line_start > line_end)
        error (EXIT_FAILURE, 0, _("starting line must preceed the ending line"));

      if (line_start > lines)
        error (EXIT_FAILURE, 0, _("starting line exceeds total lines of %s"), quote (filename));
      if (line_end > lines)
        error (EXIT_FAILURE, 0, _("ending line exceeds total lines of %s"), quote (filename));

      seen_start = line_start;
      seen_start = line_end;
    }
  return ok;
}

static bool
buffer_stdin_to_tmpfile (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  FILE *tempfile;

  if ((tempfile = tmpfile ()) == NULL)
    {
      error (0, errno, _("cannot create temporary file"));
      return false;
    }
  while (1)
    {         
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_write = 0;

      if (bytes_read == SAFE_READ_ERROR)
        {      
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      if (fwrite (buffer, 1, strlen (buffer), tempfile) < strlen (buffer))
        {
          error (0, errno, _("write error"));
          return false;
        }
    }
  input.fd = fileno (tempfile);
  input.fh = tempfile;
  rewind (tempfile);
  return true;
}

static bool
range_file (const char *filename)
{
  int fd;
  bool ok;
  is_stdin = STREQ (filename, "-");

  if (is_stdin)
    {
      have_read_stdin = true;
      fd = STDIN_FILENO;
      filename = _("standard input");
      if (O_BINARY && ! isatty (STDIN_FILENO))
	freopen (NULL, "rb", stdin);
    }
  else
    {
      fd = open (filename, O_RDONLY | O_BINARY);
      if (fd < 0)
	{
	  error (0, errno, _("cannot open %s for reading"), quote (filename));
	  return false;
	}
    }

  if (is_stdin)
    ok = buffer_stdin_to_tmpfile (filename, fd);
  else
    {
      input.fd = fd;
      ok = true;
    }
     
  ok &= validate_ranges (filename, input.fd);
  ok &= determine_seek_offsets (filename, input.fd);
  ok &= range_lines (filename, input.fd);

  if (is_stdin && fclose (input.fh) != 0)
    {
      error (0, errno, _("cannot close temporary file"));
      return false;
    }
      
  if (!is_stdin && close (fd) != 0)
    {
      error (0, errno, _("closing %s"), quote (filename));
      return false;
    }
  return ok;
}

int 
main(int argc, char **argv) 
{
  char *buf, *buf_orig, *ranges_arg = NULL, *range_digits;
  enum header_mode header_mode = multiple_files;
  bool ok = true, seen_range_sep = false;
  int c, i, digits_count, digit_increment, ranges_seen;
  
  /* Initializer for file_list if no file-arguments
     were specified on the command line.  */
  static char const *const default_file_list[] = {"-", NULL};
  char const *const *file_list;
   
  initialize_main (&argc, &argv);
  program_name = argv[0];
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  atexit (close_stdout);

  have_read_stdin = false;

  while ((c = getopt_long (argc, argv, "nqr:v", long_options, NULL))
          != -1) 
    {
      switch(c) 
        {
        case 'n':
	  number_mode = true;
          next_line_num();
          break;

	case 'q':
	  header_mode = never;
	  break;

        case 'r':
	  range_mode = true;
	  ranges_arg = optarg;
          break;

	case 'v':
	  header_mode = always;
	  break;

	case_GETOPT_HELP_CHAR;

        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);

        default:	  
          usage (EXIT_FAILURE);
        }
    }

  if (!range_mode)
    usage (EXIT_FAILURE);
  else
    {
      ranges_seen = 0;
      while (*ranges_arg)
        {
	  if (VALID_RANGE (*ranges_arg))
	    {
	      digits_count = 0;
	      range_digits = ranges_arg;
              while (VALID_RANGE (*range_digits))
	        {
	          digits_count++;
	          range_digits++;
		}
	      digits_count++;
	      buf = xcalloc (digits_count, sizeof (char));
	      buf_orig = buf;
              while (VALID_RANGE (*ranges_arg))
	        {
		  *buf = *ranges_arg++;
		  buf++;
		}
	      *buf = '\0';
	      buf = buf_orig;

              if (*buf == '+')
		{
                  if ((range_max + 1) % 2 == 0)
                    {
                      digit_increment = atoi (buf);
                      ranges[range_max] = ranges[range_max - 1] + digit_increment;
                    }
                  else
		    error (EXIT_FAILURE, 0, _("+ prefix can not be applied to starting lines"));
                }
	      else
	        ranges[range_max] = atol (buf);
              range_max++;
	      free (buf);
	      
	      if (range_max > MAX_RANGE_SETS)
	        error (EXIT_FAILURE, 0, _("only %d sets permitted"), MAX_RANGE_SETS / 2);
		
	      ranges_seen++;
	    }
	  else
	    {
	      if (ranges_seen == 2)
	        {
	          if (*ranges_arg != ',')
	            error (EXIT_FAILURE, 0, _("comma as set separator required"));
		  ranges_seen = 0;
		  seen_range_sep = false;
		}
	      else if (ranges_seen == 1 && !seen_range_sep)
	        {
		  if (*ranges_arg != ':')
	            error (EXIT_FAILURE, 0, _("colon as range separator required"));
		  else
		    seen_range_sep = true;
		}
	      else 
	        {
		  if (!VALID_RANGE (*ranges_arg))
		    error (EXIT_FAILURE, 0, _("range must be number"));
		}
	      ++ranges_arg;  
	    }  
	 }
      }
    
  file_list = (optind < argc
	       ? (char const *const *) &argv[optind]
	       : default_file_list);

  if (header_mode == always
      || (header_mode == multiple_files && optind < argc - 1))
    print_headers = true;

  if (O_BINARY && ! isatty (STDOUT_FILENO))
    freopen (NULL, "wb", stdout);

  for (i = 0; file_list[i]; i++)
    ok &= range_file (file_list[i]);

  if (have_read_stdin && close (STDIN_FILENO) < 0)
    error (EXIT_FAILURE, errno, "-");
      
  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
_______________________________________________
Bug-coreutils mailing list
Bug-coreutils@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-coreutils

Reply via email to