According to Geoff Hutchison:
> On Mon, 11 Dec 2000, Gilles Detillieux wrote:
> > stdin and stderr at all??? If you close stdin, you should probably
> > reopen is with /dev/null or something inoccuous like that, or perhaps
> > better yet the input file itself. However, a properly written external
> > parser or converter should simply leave stdin alone.
>
> Yes, my thoughts exactly. But I also didn't like the caveat of
> "properly-written." Setting stdin to /dev/null or the contents of the
> input file itself seem like decent alternatives.
>
> > In any case, you most definitely DO NOT want to close stderr, or you
> > lose any error output from the external parser or converter, which
> > would be disatrous in terms of trying to debug its operation. The
> > popen didn't do this, so why are we doing it now?
>
> Good point.
>
> > By the way, the customary practise when fork() fails is to sleep a few
> > seconds and try again, up to a certain maximum (2-4 times), or just give
>
> Look, I've written about zero code using fork, popen, dup, and company. So
> I don't know the "customs" about using them. Yes, I can read the man
> pages, but that's not the same as experience.
>
> I'd be quite happy to concentrate on the stuff I *do* have experience
> doing (i.e. htsearch work, system architecture and planning) But it also
> seems like there are a lot of important lower-level things that no one is
> stepping forward to do.
OK, I've made some pretty hefty changes to ExternalParser.cc to handle
all of these problems, the zombie process problem reported this morning,
and the old, known bug about binary output from an external converter
getting mangled. Please, folks, give this new code (committed to CVS
already, but the patch to the 121000 snapshot is below) a good workout
to make sure it's solid. I'm afraid I haven't the time to do so myself,
as I have a couple deadlines breathing down my neck. It does compile
cleanly now on my system.
I'm still very concerned about the portability of this new code to
Cygwin, so if someone can test that it'd be much appreciated. I'm also
somewhat concerned about the portability of the <wait.h> include, which
I understand is a SYSV-ism, and may not port to BSD. I haven't made
the corresponding changes to ExternalTransport.cc, but I may wait for
feedback, or for someone else to do that bit. Please, hammer away...
Index: htdig/ExternalParser.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htdig/ExternalParser.cc,v
retrieving revision 1.19.2.12
retrieving revision 1.19.2.20
diff -c -3 -p -r1.19.2.12 -r1.19.2.20
*** htdig/ExternalParser.cc 2000/12/10 21:44:08 1.19.2.12
--- htdig/ExternalParser.cc 2000/12/12 19:43:23 1.19.2.20
***************
*** 13,19 ****
// or the GNU Public License version 2 or later
// <http://www.gnu.org/copyleft/gpl.html>
//
! // $Id: ExternalParser.cc,v 1.19.2.12 2000/12/10 21:44:08 ghutchis Exp $
//
#ifdef HAVE_CONFIG_H
--- 13,19 ----
// or the GNU Public License version 2 or later
// <http://www.gnu.org/copyleft/gpl.html>
//
! // $Id: ExternalParser.cc,v 1.19.2.20 2000/12/12 19:43:23 grdetil Exp $
//
#ifdef HAVE_CONFIG_H
***************
*** 34,39 ****
--- 34,40 ----
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
+ #include <wait.h>
static Dictionary *parsers = 0;
static Dictionary *toTypes = 0;
*************** ExternalParser::parse(Retriever &retriev
*** 171,185 ****
#ifndef HAVE_MKSTEMP
path << "/htdext." << getpid(); // This is unfortunately predictable
fd = open((char*)path, O_WRONLY|O_CREAT|O_EXCL);
! if (!fd)
! return;
#else
path << "/htdex.XXXXXX";
fd = mkstemp((char*)path);
! if (!fd)
! return;
#endif
write(fd, contents->get(), contents->length());
close(fd);
--- 172,194 ----
#ifndef HAVE_MKSTEMP
path << "/htdext." << getpid(); // This is unfortunately predictable
+ #ifdef O_BINARY
+ fd = open((char*)path, O_WRONLY|O_CREAT|O_EXCL|O_BINARY);
+ #else
fd = open((char*)path, O_WRONLY|O_CREAT|O_EXCL);
! #endif
#else
path << "/htdex.XXXXXX";
fd = mkstemp((char*)path);
! // can we force binary mode somehow under Cygwin, if it has mkstemp?
#endif
+ if (fd < 0)
+ {
+ if (debug)
+ cout << "External parser error: Can't create temp file "
+ << (char *)path << endl;
+ return;
+ }
write(fd, contents->get(), contents->length());
close(fd);
*************** ExternalParser::parse(Retriever &retriev
*** 189,233 ****
char *token1, *token2, *token3;
int loc = 0, hd = 0;
URL url;
! String convertToType = ((String *)toTypes->Find(contentType))->get();
int get_hdr = (convertToType.nocase_compare("user-defined") == 0);
int get_file = (convertToType.length() != 0);
String newcontent;
int stdout_pipe[2];
! int fork_result;
! if( (pipe(stdout_pipe) == 0) )
! {
! fork_result = fork(); // Fork so we can execute in the child process
! if(fork_result == -1)
! {
! cout << "Fork Failure in ExternalParser" << endl;
! exit(EXIT_FAILURE); // Do we really want to exit?
! }
! else if(fork_result == 0) // Child process
! {
! close(STDIN_FILENO); // Close STDIN
! close(STDERR_FILENO); // Close STDERR
! close(STDOUT_FILENO); // Close then handle STDOUT
! dup(stdout_pipe[1]);
! close(stdout_pipe[0]);
! close(stdout_pipe[1]);
!
! // Call External Parser
! execl(currentParser.get(), currentParser.get(), path.get(),
! contentType.get(), base.get().get(), configFile.get(), 0);
!
! exit(EXIT_FAILURE);
! }
!
! else // Parent Process
! {
! close(stdout_pipe[1]); // Close STDOUT for writing
! FILE *input = fdopen(stdout_pipe[0], "r");
! while (readLine(input, line))
{
if (get_hdr)
{
line.chop('\r');
--- 198,285 ----
char *token1, *token2, *token3;
int loc = 0, hd = 0;
URL url;
! String mime = contentType;
! mime.lowercase();
! int sep = mime.indexOf(';');
! if (sep != -1)
! mime = mime.sub(0, sep).get();
! String convertToType = ((String *)toTypes->Find(mime))->get();
int get_hdr = (convertToType.nocase_compare("user-defined") == 0);
int get_file = (convertToType.length() != 0);
String newcontent;
+ StringList cpargs(currentParser);
+ char **parsargs = new char * [cpargs.Count() + 5];
+ int argi;
+ for (argi = 0; argi < cpargs.Count(); argi++)
+ parsargs[argi] = (char *)cpargs[argi];
+ parsargs[argi++] = path.get();
+ parsargs[argi++] = contentType.get();
+ parsargs[argi++] = (char *)base.get().get();
+ parsargs[argi++] = configFile.get();
+ parsargs[argi++] = 0;
+
int stdout_pipe[2];
! int fork_result = -1;
! int fork_try;
! if (pipe(stdout_pipe) == -1)
! {
! if (debug)
! cout << "External parser error: Can't create pipe!" << endl;
! unlink((char*)path);
! return;
! }
!
! for (fork_try = 4; --fork_try >= 0;)
! {
! fork_result = fork(); // Fork so we can execute in the child process
! if (fork_result != -1)
! break;
! if (fork_try)
! sleep(3);
! }
! if (fork_result == -1)
! {
! if (debug)
! cout << "Fork Failure in ExternalParser" << endl;
! unlink((char*)path);
! return;
! }
! if (fork_result == 0) // Child process
{
+ close(STDOUT_FILENO); // Close handle STDOUT to replace with pipe
+ dup(stdout_pipe[1]);
+ close(stdout_pipe[0]);
+ close(stdout_pipe[1]);
+ close(STDIN_FILENO); // Close STDIN to replace with file
+ open((char*)path, O_RDONLY);
+
+ // Call External Parser
+ execv(currentParser.get(), parsargs);
+
+ exit(EXIT_FAILURE);
+ }
+
+ // Parent Process
+ delete [] parsargs;
+ close(stdout_pipe[1]); // Close STDOUT for writing
+ #ifdef O_BINARY
+ FILE *input = fdopen(stdout_pipe[0], "rb");
+ #else
+ FILE *input = fdopen(stdout_pipe[0], "r");
+ #endif
+ if (input == NULL)
+ {
+ if (debug)
+ cout << "Fdopen Failure in ExternalParser" << endl;
+ unlink((char*)path);
+ return;
+ }
+
+ while ((!get_file || get_hdr) && readLine(input, line))
+ {
if (get_hdr)
{
line.chop('\r');
*************** ExternalParser::parse(Retriever &retriev
*** 243,265 ****
}
continue;
}
! if (get_file)
! {
! if (newcontent.length() == 0 &&
! !canParse(convertToType) &&
! mystrncasecmp((char*)convertToType, "text/", 5) != 0)
! {
! if (mystrcasecmp((char*)convertToType, "user-defined") == 0)
! cerr << "External parser error: no Content-Type given\n";
! else
! cerr << "External parser error: can't parse Content-Type \""
! << convertToType << "\"\n";
! cerr << " URL: " << base.get() << "\n";
! break;
! }
! newcontent << line << '\n';
! continue;
! }
token1 = strtok(line, "\t");
if (token1 == NULL)
token1 = "";
--- 295,303 ----
}
continue;
}
! #ifdef O_BINARY
! line.chop('\r');
! #endif
token1 = strtok(line, "\t");
if (token1 == NULL)
token1 = "";
*************** ExternalParser::parse(Retriever &retriev
*** 446,455 ****
break;
}
} // while(readLine)
fclose(input);
// close(stdout_pipe[0]); // This is closed for us by the fclose()
! } // fork_result
! } // create pipes
unlink((char*)path);
if (newcontent.length() > 0)
--- 484,514 ----
break;
}
} // while(readLine)
+ if (get_file)
+ {
+ if (!canParse(convertToType) &&
+ mystrncasecmp((char*)convertToType, "text/", 5) != 0)
+ {
+ if (mystrcasecmp((char*)convertToType, "user-defined") == 0)
+ cerr << "External parser error: no Content-Type given\n";
+ else
+ cerr << "External parser error: can't parse Content-Type \""
+ << convertToType << "\"\n";
+ cerr << " URL: " << base.get() << "\n";
+ }
+ else
+ {
+ char buffer[2048];
+ int length;
+ while ((length = fread(buffer, 1, sizeof(buffer), input)) > 0)
+ newcontent.append(buffer, length);
+ }
+ }
fclose(input);
// close(stdout_pipe[0]); // This is closed for us by the fclose()
! int rpid, status;
! while ((rpid = wait(&status)) != fork_result && rpid != -1)
! ;
unlink((char*)path);
if (newcontent.length() > 0)
--
Gilles R. Detillieux E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba Phone: (204)789-3766
Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.