------- Original Message -------
On Tuesday, September 12th, 2023 at 2:36 PM, Rob Landley <r...@landley.net>
wrote:
> On 9/11/23 23:56, Oliver Webb via Toybox wrote:
>
> > I have made a implementation of the 'csplit' command in about 160 lines of
> > code.
>
>
> You have TOYFLAG_MAYFORK on this command. Sigh, explaining the lib/toyflags.h
> values is one of the tutorial videos I need to make.
You do cover the behavior of the NOFORK and MAYFORK flags
in your video on the true/false commands
> I dunno why csplit would want MAYFORK here. A normal command can just xexit()
> and let the kernel close filehandles and free memory when the process exits. I
> note that 95% of the overhead of fork/exec is the exec part, not the fork
> part,
> so "fork and call toy_find("blah")->toy_main()" is still pretty cheap. (On
I have removed the MAYFORK flag in the implementation.
> > The other main one is the fact it doesn't do "[LINE] {[NUMBER]}" cleanly
> > yet.
>
>
> I applied what you sent verbatim and haven't started cleaning anything up yet,
> if you have more work to do I'm not actually familiar with csplit. (Never used
> it, still need to come up to speed...)
I do have another patch to submit, it fixes that LINE {NUMBER} thingy,
with the created problem that it doesn't do "[RULE] {NUMBER} LINE" correctly
because it resets the line number every time it encounters "{}" rules.
After a more careful reading of the POSIX standard, I realized that the
"%regexp%" rules
don't function like "/regexp/1" and _exclude_ lines up to the pattern, After
some fiddling with the code
that switches files and fprintf-s lines to them, I got the "%regexp%" rules to
work like they should:
$ seq 10 | ./csplit - 2 %4% 7 -s
$ cat xx0*
1
4
5
6
7
8
9
10
$ seq 10 | csplit - 2 %4% 7 -s
$ cat xx0*
1
4
5
6
7
8
9
10
The reason why -s is passed to both is because I have not gotten file size
output
to work consistently yet. The implementation stat()'s written files which
return byte sizes
that differ from the coreutils csplit I am testing against.
From 9e2b87dc242951e17a478609cf4fd90ca8a21aff Mon Sep 17 00:00:00 2001
From: Oliver Webb <aquahobby...@proton.me>
Date: Tue, 12 Sep 2023 22:04:56 -0500
Subject: [PATCH] Minor fixes in csplit, got %regex% to exclude stuff
---
toys/pending/csplit.c | 41 +++++++++++++++++++++++++----------------
1 file changed, 25 insertions(+), 16 deletions(-)
diff --git a/toys/pending/csplit.c b/toys/pending/csplit.c
index 53fe2f9d..08eef76d 100644
--- a/toys/pending/csplit.c
+++ b/toys/pending/csplit.c
@@ -9,7 +9,7 @@
* GNU Extension: "{*}"
*
-USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_MAYFORK))
+USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN))
config CSPLIT
bool "csplit"
@@ -26,7 +26,7 @@ config CSPLIT
Valid Rules:
/regexp/[INTEGER] Break file before line that regexp matches,
- %regexp%[INTEGER] Break file after line that regexp matches,
+ %regexp%[INTEGER]
If a offset is specified for these rules, the break will happen [INTEGER]
lines after the regexp match
if a offset is specified, it will break at [INTEGER] lines after the offset
@@ -45,8 +45,8 @@ GLOBALS(
)
size_t indx = 1, findx = 0, lineno = 1, btc = 0;
+int eg = 0, offset = -1, withld = 0;
char *filefmt, *flname, *prefix;
-int eg = 0, offset = -1;
// This is only int so we can exit cleanly in ternary operators
int abort_csplit(char *err) {
@@ -85,7 +85,7 @@ int cntxt(char *line, char *rule) {
return rgmatch(rule, line, "/%[^/%]/%d");
break;
case '%':
- offset++;
+ withld = 1;
return rgmatch(rule, line, "%%%[^/%]%%%d");
break;
@@ -96,6 +96,9 @@ int cntxt(char *line, char *rule) {
else if (!sscanf(rule,"{%lu}",&btc))
abort_csplit("bad rule");
+ // Reset the lineno so we can do things like "10 {*}"
+ lineno = 1;
+
if (cntxt(line, toys.optargs[indx-1])) {
// Manipulate the rule then return to it later so we create a
// new file but are still on the same rule. This is the only
@@ -110,9 +113,13 @@ int cntxt(char *line, char *rule) {
break;
default:
- offset = ((size_t)atoll(rule)) ? (atoll(rule)) : abort_csplit("bad rule");
- return (lineno > offset) ? abort_csplit("bad rule order") :
- (lineno == offset);
+ if (lineno > ((size_t)atoll(rule))) {
+ abort_csplit("bad rule order");
+ } else if (!(atoll(rule))) {
+ abort_csplit("bad rule");
+ } else {
+ return (lineno == (size_t)atoll(rule));
+ }
break;
}
@@ -137,20 +144,22 @@ void csplit_main(void)
lineno++;
if (cntxt(line, toys.optargs[indx])) {
- fclose(actvfile);
- if (!FLAG(s)) {
- stat(flname, &st);
- printf("%ld\n", st.st_size);
+ if (!withld) {
+ fclose(actvfile);
+ if (!FLAG(s)) {
+ stat(flname, &st);
+ printf("%ld\n", st.st_size);
+ }
+ findx++;
+ flname = xmprintf(filefmt, prefix, findx);
+ actvfile = xfopen(flname, "w+");
}
indx++;
- findx++;
- flname = xmprintf(filefmt, prefix, findx);
- actvfile = xfopen(flname, "w+");
-
+ withld = 0;
if (indx == toys.optc) eg = 1;
}
- fprintf(actvfile, "%s\n", line);
+ if (!withld) fprintf(actvfile, "%s\n", line);
}
fclose(actvfile);
--
2.34.1
_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net