------- Original Message -------
On Tuesday, September 12th, 2023 at 2:36 PM, Rob Landley <r...@landley.net> 
wrote:


> On 9/11/23 23:56, Oliver Webb via Toybox wrote:
> 
> > I have made a implementation of the 'csplit' command in about 160 lines of 
> > code.
> 
> 
> You have TOYFLAG_MAYFORK on this command. Sigh, explaining the lib/toyflags.h
> values is one of the tutorial videos I need to make.

You do cover the behavior of the NOFORK and MAYFORK flags
in your video on the true/false commands

> I dunno why csplit would want MAYFORK here. A normal command can just xexit()
> and let the kernel close filehandles and free memory when the process exits. I
> note that 95% of the overhead of fork/exec is the exec part, not the fork 
> part,
> so "fork and call toy_find("blah")->toy_main()" is still pretty cheap. (On

I have removed the MAYFORK flag in the implementation.

> > The other main one is the fact it doesn't do "[LINE] {[NUMBER]}" cleanly 
> > yet.
> 
> 
> I applied what you sent verbatim and haven't started cleaning anything up yet,
> if you have more work to do I'm not actually familiar with csplit. (Never used
> it, still need to come up to speed...)

I do have another patch to submit, it fixes that LINE {NUMBER} thingy, 
with the created problem that it doesn't do "[RULE] {NUMBER} LINE" correctly
because it resets the line number every time it encounters "{}" rules.

After a more careful reading of the POSIX standard, I realized that the 
"%regexp%" rules
don't function like "/regexp/1" and _exclude_ lines up to the pattern, After 
some fiddling with the code
that switches files and fprintf-s lines to them, I got the "%regexp%" rules to 
work like they should:

  $ seq 10 | ./csplit - 2 %4% 7 -s
  $ cat xx0*
  1
  4
  5
  6
  7
  8
  9
  10
 
  $ seq 10 | csplit - 2 %4% 7 -s
  $ cat xx0*
  1
  4
  5
  6
  7
  8
  9
  10


The reason why -s is passed to both is because I have not gotten file size 
output
to work consistently yet. The implementation stat()'s written files which 
return byte sizes
that differ from the coreutils csplit I am testing against.
From 9e2b87dc242951e17a478609cf4fd90ca8a21aff Mon Sep 17 00:00:00 2001
From: Oliver Webb <aquahobby...@proton.me>
Date: Tue, 12 Sep 2023 22:04:56 -0500
Subject: [PATCH] Minor fixes in csplit, got %regex% to exclude stuff

---
 toys/pending/csplit.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/toys/pending/csplit.c b/toys/pending/csplit.c
index 53fe2f9d..08eef76d 100644
--- a/toys/pending/csplit.c
+++ b/toys/pending/csplit.c
@@ -9,7 +9,7 @@
  *	GNU Extension: "{*}"
  *
 
-USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_MAYFORK))
+USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN))
 
 config CSPLIT
   bool "csplit"
@@ -26,7 +26,7 @@ config CSPLIT
 
 	Valid Rules:
 	/regexp/[INTEGER] Break file before line that regexp matches,
-	%regexp%[INTEGER] Break file after line that regexp matches,
+	%regexp%[INTEGER] 
 	If a offset is specified for these rules, the break will happen [INTEGER]
 	lines after the regexp match
 	if a offset is specified, it will break at [INTEGER] lines after the offset
@@ -45,8 +45,8 @@ GLOBALS(
 )
 
 size_t indx = 1, findx = 0, lineno = 1, btc = 0;
+int eg = 0, offset = -1, withld = 0;
 char *filefmt, *flname, *prefix;
-int eg = 0, offset = -1;
 
 // This is only int so we can exit cleanly in ternary operators
 int abort_csplit(char *err) {
@@ -85,7 +85,7 @@ int cntxt(char *line, char *rule) {
 	  return rgmatch(rule, line, "/%[^/%]/%d");
 	  break;
 	case '%':
-	  offset++;
+	  withld = 1;
 	  return rgmatch(rule, line, "%%%[^/%]%%%d");
 	  break;
 
@@ -96,6 +96,9 @@ int cntxt(char *line, char *rule) {
 	  else if (!sscanf(rule,"{%lu}",&btc))
 		abort_csplit("bad rule");
 
+	  // Reset the lineno so we can do things like "10 {*}"
+	  lineno = 1;
+
 	  if (cntxt(line, toys.optargs[indx-1])) {
 		// Manipulate the rule then return to it later so we create a
 		// new file but are still on the same rule. This is the only
@@ -110,9 +113,13 @@ int cntxt(char *line, char *rule) {
 	  break;
 
 	default:
-	 offset = ((size_t)atoll(rule)) ? (atoll(rule)) : abort_csplit("bad rule");
-	 return (lineno > offset) ? abort_csplit("bad rule order") :
-	   (lineno == offset);
+	 if (lineno > ((size_t)atoll(rule))) { 
+	   abort_csplit("bad rule order");
+	 } else if (!(atoll(rule))) {
+	   abort_csplit("bad rule");
+	 } else {
+	   return (lineno == (size_t)atoll(rule)); 
+	 }
 	 break;
   }
 
@@ -137,20 +144,22 @@ void csplit_main(void)
 	lineno++;
 	if (cntxt(line, toys.optargs[indx])) {
 
-	  fclose(actvfile);
-	  if (!FLAG(s)) {
-		stat(flname, &st);
-		printf("%ld\n", st.st_size);
+	  if (!withld) {
+		fclose(actvfile);
+		if (!FLAG(s)) {
+		  stat(flname, &st);
+		  printf("%ld\n", st.st_size);
+		}
+		findx++;
+		flname = xmprintf(filefmt, prefix, findx);
+		actvfile = xfopen(flname, "w+");
 	  }
 
 	  indx++;
-	  findx++;
-	  flname = xmprintf(filefmt, prefix, findx);
-	  actvfile = xfopen(flname, "w+");
-
+	  withld = 0;
 	  if (indx == toys.optc) eg = 1;
 	}
-	fprintf(actvfile, "%s\n", line);
+	if (!withld) fprintf(actvfile, "%s\n", line);
   }
 
   fclose(actvfile);
-- 
2.34.1

_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to