Re: [hackers] [sbase][PATCH] Add implementation of tac(1)

2024-03-07 Thread Roberto E. Vargas Caballero
Hi,

As this is a topic more about sbase/ubase organization more
than about patches I am going to move the discussion to the
dev mailing list. Please, answer there instead  of here  in 
hackers.


Regards,



Re: [hackers] [sbase][PATCH] Add implementation of tac(1)

2024-03-07 Thread Roberto E. Vargas Caballero
Hi,

I was thinking about what to do with these patches adding new
commands. They raised a concern about what should be the scope of
sbase. The idea of sbase was to provide a minimal portable POSIX
base, while having ubase for the POSIX commands that cannot be
implemented in a portable way.  Saying that, it is obvious that
there are programs in sbase that are not part of POSIX:

- md5sum
- sha256sum
- sha384sum
- sha512sum
- sponge
- sync
- tar
- install

and maybe some others. At this point is not clear to me what to do
with tools like tac or shuf. There was a small discussion about
this topic in the irc channel, and it was proposed to add a 3rd
repository to contain all these tools that are not part of POSIX
(a bit like the moretools package).  From my point of view, the
main drawback of it is that it requires a 3rd -box program (currently
we have sbase-box and ubase-box). The current situation it not good,
because the two -box are not sharing the library, and the disk space
is duplicated (main reason of -box is to minimize disk space for
restricted environments), and a 3rd -box would make the situation
even worse. But in the other hand, I don't want to add more non
POSIX tools in sbase (in fact, I personally would like to remove
the current non POSIX tools).

I would like to move the discussion here and see what alternatives
we have and how to proceed in this case.

Regards,




Re: [hackers] [sbase][PATCH] Add implementation of tac(1)

2024-03-05 Thread Eric Pruitt
On Sun, Mar 03, 2024 at 12:34:16AM +0100, Elie Le Vaillant wrote:
> +static void
> +tac(FILE *fp)
> +{
> + struct linebuf buf = EMPTY_LINEBUF;
> + struct line line;
> + getlines(fp, &buf);
> +
> + if (buf.nolf) {
> + /* If the last line is not LF-terminated, the
> +  * first output line should not be either */
> +  buf.nolf--;
> +  buf.lines[buf.nlines - 1].len--;
> + }
> +
> + while (buf.nlines--) {
> + line = buf.lines[buf.nlines];
> + fwrite(line.data, 1, line.len, stdout);
> + free(line.data);
> + }
> + free(buf.lines);
> +}

I'm not sure what level of optimization the maintainers expect, but
personally, I think there should be separate implementations for
seekable vs non-seekable files to avoid buffering the entire contents of
the file in memory unnecessarily.

Eric



Re: [hackers] [sbase][PATCH] Add implementation of tac(1)

2024-03-04 Thread Eolien55
Eric Pruitt  wrote:
> I think there should be separate implementations for seekable vs
> non-seekable files to avoid buffering the entire contents of
> the file in memory unnecessarily.

In fact, performance could be also improved for non-seekable files
by forcing a seekable context, ie. use a temporary file (that is
seekable), and then perform the function for seekable files on it.
coreutils' tac does this.

But I'm not sure the performance gain is worth the complexity.
On my computer, for a 115M file generated as follows
yes Hello World | head -n1000
here is the time for this tac implementation, obtained by
`tac 

[hackers] [sbase][PATCH] Add implementation of tac(1)

2024-03-02 Thread Elie Le Vaillant
---
 .gitignore |  1 +
 Makefile   |  1 +
 README |  1 +
 libutil/getlines.c |  3 +-
 tac.1  | 22 +++
 tac.c  | 68 ++
 text.h |  3 +-
 7 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 tac.1
 create mode 100644 tac.c

diff --git a/.gitignore b/.gitignore
index c2504af..daa7189 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,6 +77,7 @@
 /sponge
 /strings
 /sync
+/tac
 /tail
 /tar
 /tee
diff --git a/Makefile b/Makefile
index 8cfd981..8a4c1ef 100644
--- a/Makefile
+++ b/Makefile
@@ -166,6 +166,7 @@ BIN =\
sponge\
strings\
sync\
+   tac\
tail\
tar\
tee\
diff --git a/README b/README
index cadc89e..f16eab8 100644
--- a/README
+++ b/README
@@ -118,6 +118,7 @@ The following tools are implemented:
 0=*|x sponge  .
 0#*|o strings .
 0=*|x sync.
+0=* x tac .
 0=*|o tail.
 0=*|x tar .
 0=*|o tee .
diff --git a/libutil/getlines.c b/libutil/getlines.c
index cef9a61..34392f7 100644
--- a/libutil/getlines.c
+++ b/libutil/getlines.c
@@ -23,7 +23,8 @@ getlines(FILE *fp, struct linebuf *b)
b->lines[b->nlines - 1].len = linelen;
}
free(line);
-   if (b->lines && b->nlines && linelen && b->lines[b->nlines - 
1].data[linelen - 1] != '\n') {
+   b->nolf = b->lines && b->nlines && linelen && b->lines[b->nlines - 
1].data[linelen - 1] != '\n';
+   if (b->nolf) {
b->lines[b->nlines - 1].data = erealloc(b->lines[b->nlines - 
1].data, linelen + 2);
b->lines[b->nlines - 1].data[linelen] = '\n';
b->lines[b->nlines - 1].data[linelen + 1] = '\0';
diff --git a/tac.1 b/tac.1
new file mode 100644
index 000..0274fc6
--- /dev/null
+++ b/tac.1
@@ -0,0 +1,22 @@
+.Dd 2024-02-25
+.Dt TAC 1
+.Os sbase
+.Sh NAME
+.Nm tac
+.Nd concatenate files and print them in reverse
+.Sh SYNOPSIS
+.Nm
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+reads each
+.Ar file
+in sequence and prints their lines in reverse order.
+The files are still printed in order.
+If no
+.Ar file
+is given,
+.Nm
+reads from stdin.
+.Sh SEE ALSO
+.Xr cat 1
diff --git a/tac.c b/tac.c
new file mode 100644
index 000..5dca474
--- /dev/null
+++ b/tac.c
@@ -0,0 +1,68 @@
+#include 
+#include 
+#include 
+
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+   eprintf("usage: %s [file ...]\n", argv0);
+}
+
+static void
+tac(FILE *fp)
+{
+   struct linebuf buf = EMPTY_LINEBUF;
+   struct line line;
+   getlines(fp, &buf);
+
+   if (buf.nolf) {
+   /* If the last line is not LF-terminated, the
+* first output line should not be either */
+buf.nolf--;
+buf.lines[buf.nlines - 1].len--;
+   }
+
+   while (buf.nlines--) {
+   line = buf.lines[buf.nlines];
+   fwrite(line.data, 1, line.len, stdout);
+   free(line.data);
+   }
+   free(buf.lines);
+}
+
+int
+main(int argc, char *argv[])
+{
+   FILE *fp;
+   int ret = 0;
+
+   ARGBEGIN {
+   default:
+   usage();
+   } ARGEND
+
+   if (!argc) {
+   tac(stdin);
+   } else {
+   for (; *argv; argc--, argv++) {
+   if (!strcmp(*argv, "-")) {
+   *argv = "";
+   fp = stdin;
+   } else if (!(fp = fopen(*argv, "r"))) {
+   weprintf("fopen %s:", *argv);
+   ret = 1;
+   continue;
+   }
+   tac(fp);
+   if (fp != stdin && fshut(fp, *argv))
+   ret = 1;
+   }
+   }
+
+   ret |= fshut(stdin, "") | fshut(stdout, "");
+
+   return ret;
+}
diff --git a/text.h b/text.h
index 9858592..dcebbdc 100644
--- a/text.h
+++ b/text.h
@@ -9,8 +9,9 @@ struct linebuf {
struct line *lines;
size_t nlines;
size_t capacity;
+   int nolf;
 };
-#define EMPTY_LINEBUF {NULL, 0, 0,}
+#define EMPTY_LINEBUF {NULL, 0, 0, 0,}
 void getlines(FILE *, struct linebuf *);
 
 int linecmp(struct line *, struct line *);
-- 
2.44.0




[hackers] [sbase][PATCH] Add implementation of tac(1)

2024-02-26 Thread Elie Le Vaillant
---
 .gitignore |  1 +
 Makefile   |  1 +
 README |  1 +
 libutil/getlines.c |  3 +-
 tac.1  | 22 +++
 tac.c  | 68 ++
 text.h |  3 +-
 7 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 tac.1
 create mode 100644 tac.c

diff --git a/.gitignore b/.gitignore
index c2504af..daa7189 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,6 +77,7 @@
 /sponge
 /strings
 /sync
+/tac
 /tail
 /tar
 /tee
diff --git a/Makefile b/Makefile
index 8cfd981..8a4c1ef 100644
--- a/Makefile
+++ b/Makefile
@@ -166,6 +166,7 @@ BIN =\
sponge\
strings\
sync\
+   tac\
tail\
tar\
tee\
diff --git a/README b/README
index cadc89e..f16eab8 100644
--- a/README
+++ b/README
@@ -118,6 +118,7 @@ The following tools are implemented:
 0=*|x sponge  .
 0#*|o strings .
 0=*|x sync.
+0=* x tac .
 0=*|o tail.
 0=*|x tar .
 0=*|o tee .
diff --git a/libutil/getlines.c b/libutil/getlines.c
index cef9a61..34392f7 100644
--- a/libutil/getlines.c
+++ b/libutil/getlines.c
@@ -23,7 +23,8 @@ getlines(FILE *fp, struct linebuf *b)
b->lines[b->nlines - 1].len = linelen;
}
free(line);
-   if (b->lines && b->nlines && linelen && b->lines[b->nlines - 
1].data[linelen - 1] != '\n') {
+   b->nolf = b->lines && b->nlines && linelen && b->lines[b->nlines - 
1].data[linelen - 1] != '\n';
+   if (b->nolf) {
b->lines[b->nlines - 1].data = erealloc(b->lines[b->nlines - 
1].data, linelen + 2);
b->lines[b->nlines - 1].data[linelen] = '\n';
b->lines[b->nlines - 1].data[linelen + 1] = '\0';
diff --git a/tac.1 b/tac.1
new file mode 100644
index 000..0274fc6
--- /dev/null
+++ b/tac.1
@@ -0,0 +1,22 @@
+.Dd 2024-02-25
+.Dt TAC 1
+.Os sbase
+.Sh NAME
+.Nm tac
+.Nd concatenate files and print them in reverse
+.Sh SYNOPSIS
+.Nm
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+reads each
+.Ar file
+in sequence and prints their lines in reverse order.
+The files are still printed in order.
+If no
+.Ar file
+is given,
+.Nm
+reads from stdin.
+.Sh SEE ALSO
+.Xr cat 1
diff --git a/tac.c b/tac.c
new file mode 100644
index 000..5dca474
--- /dev/null
+++ b/tac.c
@@ -0,0 +1,68 @@
+#include 
+#include 
+#include 
+
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+   eprintf("usage: %s [file ...]\n", argv0);
+}
+
+static void
+tac(FILE *fp)
+{
+   struct linebuf buf = EMPTY_LINEBUF;
+   struct line line;
+   getlines(fp, &buf);
+
+   if (buf.nolf) {
+   /* If the last line is not LF-terminated, the
+* first output line should not be either */
+buf.nolf--;
+buf.lines[buf.nlines - 1].len--;
+   }
+
+   while (buf.nlines--) {
+   line = buf.lines[buf.nlines];
+   fwrite(line.data, 1, line.len, stdout);
+   free(line.data);
+   }
+   free(buf.lines);
+}
+
+int
+main(int argc, char *argv[])
+{
+   FILE *fp;
+   int ret = 0;
+
+   ARGBEGIN {
+   default:
+   usage();
+   } ARGEND
+
+   if (!argc) {
+   tac(stdin);
+   } else {
+   for (; *argv; argc--, argv++) {
+   if (!strcmp(*argv, "-")) {
+   *argv = "";
+   fp = stdin;
+   } else if (!(fp = fopen(*argv, "r"))) {
+   weprintf("fopen %s:", *argv);
+   ret = 1;
+   continue;
+   }
+   tac(fp);
+   if (fp != stdin && fshut(fp, *argv))
+   ret = 1;
+   }
+   }
+
+   ret |= fshut(stdin, "") | fshut(stdout, "");
+
+   return ret;
+}
diff --git a/text.h b/text.h
index 9858592..dcebbdc 100644
--- a/text.h
+++ b/text.h
@@ -9,8 +9,9 @@ struct linebuf {
struct line *lines;
size_t nlines;
size_t capacity;
+   int nolf;
 };
-#define EMPTY_LINEBUF {NULL, 0, 0,}
+#define EMPTY_LINEBUF {NULL, 0, 0, 0,}
 void getlines(FILE *, struct linebuf *);
 
 int linecmp(struct line *, struct line *);
-- 
2.43.0