Hello,
I have implemented several fixes for the httpfs translator to make it
compatible with modern web servers and improve path handling within
the Hurd VFS.

Summary of changes:

1. Protocol Compliance: Added a standard User-Agent header and
explicit Host header in HTTP/1.1 requests. Modern servers (such as
gnu.org or CDN-backed sites) often return 403 Forbidden or terminate
connections when these headers are missing.

2. Header/Body Separation: Improved open_connection() logic using
MSG_PEEK to identify the end of HTTP headers (\r\n\r\n). This ensures
the socket file descriptor is correctly positioned at the start of the
body before being handed over to the reader, preventing "Resource
lost" errors during file reads.

3. Path Normalization: Fixed a bug in fill_dirnode() where
concatenating the parent path and filename resulted in duplicate
strings (e.g., /index.htmlindex.html). Added a helper to ensure all
relative paths are properly formatted with a single leading slash.

4. Stability: Redirected debug messages to stderr to prevent
interfering with the translator's data output and ensured non-critical
parse errors do not cause the translator to terminate.

I have tested these changes by mounting www.example.com and
www.gnu.org on a Debian GNU/Hurd system.

ChangeLog: 2026-01-12 Gianluca Cannata <[email protected]>

    * http.c (get_safe_path): New static function for URL normalization.
                (open_connection): Use MSG_PEEK to find the end of
HTTP headers. Added User-Agent and Connection: close headers.
                (fill_dirnode): Fix path concatenation logic to
prevent duplicate path segments. Redirect debug output to stderr.

Gianluca
Index: extract.c
===================================================================
RCS file: /sources/hurdextras/httpfs/extract.c,v
retrieving revision 1.2
diff -u -r1.2 extract.c
--- extract.c	27 Jan 2013 23:16:14 -0000	1.2
+++ extract.c	12 Jan 2026 19:36:38 -0000
@@ -117,7 +117,7 @@
 			strcpy(temp,strchr(temp,'/'));
 			temp++;
 		}
-		if ( no_of_slashes_here < no_of_slashes ) 
+		if ( no_of_slashes_here < argz_count (httpfs_url_argz, httpfs_url_argz_len) ) 
 		{
 			/* not going to support top level directory
 			 * from a given directory
@@ -137,20 +137,26 @@
 		}
 
 		token = strdupa(string);
-		for ( i=0 ; i<no_of_slashes-1 ; i++ ) 
-		{
-			/* extract file and directory from the string
-			 * like /file/temp/a.html
-			 * extract file fill it as a directory
-			 * extract temp fill it as a directory under file/
-			 * extract a.html fill it as a file under temp/ */
-			
-			temp = strdupa(token); 
-			if ( strcmp(dir_tok[i],strtok(temp,"/")) )
-				return;
-			strcpy(token,strchr(token,'/'));
-			token++;
-		}
+
+		int base_depth = argz_count (httpfs_url_argz, httpfs_url_argz_len);
+		char *current_argz_part = httpfs_url_argz;
+
+		for (i = 0; i < (base_depth - 1); i++)
+	          {
+		    temp = strdupa (token);
+		    char *current_token = strtok (temp, "/");
+
+		    if (current_argz_part && current_token)
+		      if (strcmp (current_argz_part, current_token) != 0)
+		        return;
+
+		    char *next_slash = strchr (token, '/');
+		    if (next_slash)
+                      token = next_slash + 1;
+
+		    current_argz_part = argz_next (httpfs_url_argz, httpfs_url_argz_len, current_argz_part);
+		  }
+
 		parent = strdupa("tmp");
 		string = strdupa(token);
 	}
@@ -193,4 +199,3 @@
 
 	return;
 }
-
Index: http.c
===================================================================
RCS file: /sources/hurdextras/httpfs/http.c,v
retrieving revision 1.4
diff -u -r1.4 http.c
--- http.c	9 Sep 2020 17:12:36 -0000	1.4
+++ http.c	12 Jan 2026 19:36:38 -0000
@@ -1,4 +1,4 @@
-/* HTTP protocol routines */
+/* HTTP protocol routines for httpfs */
 
 /* This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -28,275 +28,175 @@
 #include <string.h>
 #include <malloc.h>
 #include <stddef.h>
+#include <argz.h>
 
 #include "httpfs.h"
-
 #include <hurd/hurd_types.h>
 #include <hurd/netfs.h>
 
-/* do a DNS lookup for NAME and store result in *ENT */
-error_t lookup_host (char *url, struct hostent **ent) 
-{
-	if ( (*ent = gethostbyname(url)) == NULL )
-	{
-		fprintf(stderr,"wrong host name\n");
-		return EINVAL;
-	}
-	return 0;
+/* Extracts the relative path. If it receives "/" returns "/" */
+static const char *get_safe_path(const char *req) {
+    if (!req || strlen(req) == 0) return "/";
+    const char *p = strstr(req, "://");
+    if (p) {
+        p = strchr(p + 3, '/');
+        return p ? p : "/";
+    }
+    return (req[0] == '/') ? req : "/";
 }
 
-/* open a connection with the remote web server */
-error_t open_connection(struct netnode *node, int *fd,off_t *head_len) 
-{
-	/* HTTP GET command returns head and body so we have to prune the
-	 * head. *HEAD_LEN indicates the header length, for pruning upto that */
-	
-	error_t err;
-	struct hostent *hptr;
-	struct sockaddr_in dest;
-	ssize_t written;
-	size_t towrite;
-	char buffer[4096];
-	ssize_t bytes_read;
-	char *token,*mesg;
-	int code;
-	char delimiters0[] = " ";
-	char delimiters1[] = "\n";
-
-	bzero(&dest,sizeof(dest));
-	dest.sin_family = AF_INET;
-	dest.sin_port = htons (port);
-
-	if ( !strcmp(ip_addr,"0.0.0.0") )
-	{
-		/* connection is not through a proxy server
-		 * find IP addr. of remote server */
-		err = lookup_host (node->url, &hptr);
-		if (err)
-		{
-			fprintf(stderr,"Could not find IP addr %s\n",node->url);
-			return err;
-		}
-		dest.sin_addr = *(struct in_addr *)hptr->h_addr;
-	}
-	else
-	{
-		/* connection is through the proxy server
-		 * need not find IP of remote server
-	         * find IP of the proxy server */
-		if ( inet_aton(ip_addr,&dest.sin_addr) == 0 )
-		{
-			fprintf(stderr,"Invalid IP for proxy\n");
-			return -1;
-		}
-	}
-
-	if (debug_flag)
-		fprintf (stderr, "trying to open %s:%d/%s\n", node->url,
-				port, node->conn_req);
-
-	*fd = socket (AF_INET, SOCK_STREAM, 0);
-	if (*fd == -1)
-	{
-		fprintf(stderr,"Socket creation error\n");
-		return errno;
-	}
-
-	err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
-	if (err == -1)
-	{
-		fprintf(stderr,"Cannot connect to remote host\n");
-		return errno;
-	}
-
-	/* Send a HEAD request find header length */
-	sprintf(buffer,"HEAD %s HTTP/1.0\n\n",node->conn_req);
-	towrite = strlen (buffer);
-	written = TEMP_FAILURE_RETRY (write (*fd, buffer, towrite));
-	if ( written == -1 || written < towrite )
-	{
-		fprintf(stderr,"Could not send an HTTP request to host\n");
-		return errno;
-	}
-	
-	bytes_read = read(*fd,buffer,sizeof(buffer));
-	if ( bytes_read < 0 ) 
-	{
-		fprintf(stderr,"Error with HEAD read\n");
-		return errno;
-	}
-
-	*head_len = bytes_read;
-	token = strtok(buffer,delimiters0);
-	token = strtok(NULL,delimiters0);
-	sscanf(token,"%d",&code);
-	token = strtok(NULL,delimiters1);
-	mesg = strdup(token);
-	if ( code != 200 ) 
-	{
-		/* page does not exist */
-		fprintf(stderr,"Error Page not Accesible\n");
-		fprintf(stderr,"%d %s\n",code,mesg);
-		return EBADF;
-	}
-	
-	close(*fd);
-	
-	/* Send the GET request for the url */
-	*fd = socket (AF_INET, SOCK_STREAM, 0);
-	if (*fd == -1)
-	{
-		fprintf(stderr,"Socket creation error\n");
-		return errno;
-	}
-
-	err = connect (*fd, (struct sockaddr *)&dest, sizeof (dest));
-	if (err == -1)
-	{
-		fprintf(stderr,"Cannot connect to remote host\n");
-		return errno;
-	}
-
-	towrite = strlen (node->comm_buf);
-
-	/* guard against EINTR failures */
-	written = TEMP_FAILURE_RETRY (write (*fd, node->comm_buf, towrite));
-	written += TEMP_FAILURE_RETRY (write (*fd, "\n\n",  2));
-	if (written == -1 || written < (towrite+2)) 
-	{
-		fprintf(stderr,"Could not send GET request to remote host\n");
-		return errno;
-	}
-	return 0;
+/* DNS Lookup */
+error_t lookup_host(char *url, struct hostent **ent) {
+    if ((*ent = gethostbyname(url)) == NULL) {
+        fprintf(stderr, "httpfs: DNS error for %s\n", url);
+        return EINVAL;
+    }
+    return 0;
 }
 
-/* fetch a directory node from the web server
- * DIR should already be locked */
-error_t fill_dirnode (struct netnode *dir) 
-{
-	error_t err = 0;
-	struct node *nd, **prevp;
-	struct files *go;
-	char *comm_buf,*url,*conn_req,*f_name,*temp,*temp1;
-
-	if (debug_flag)
-		fprintf (stderr, "filling out dir %s\n", dir->file_name);
-	
-	if ( dir->type == HTTP_DIR_NOT_FILLED ) {
-		/* it is an unfilled directory so send a GET request for that
-		 * directory and parse the incoming HTML stream to get the file 
-		 * and directories within that
-		 * and Fill the intermediate data-structure *file */
-		err = parse(dir);
-		if ( err )
-			return err;
-		dir->type = HTTP_DIR;
-	}
-
-	
-	dir->noents = TRUE;
-	dir->num_ents = 0;
-	prevp = &dir->ents;
-	
-	for(go=list_of_entries;go!=NULL;go=go->next)
-	{
-		/* *file linked list contains all the file info obtained from
-		 * parsing the <a href="..">
-		 * select the ones belonging to this particular directory
-		 * and fill its node */
-		
-		if(strcmp(dir->file_name,go->parent)==0)
-		{
-			/* got a file in this directory 
-			 * directory under consideration is dir->file_name
-			 * so have to fetch all files whose parent is
-			 * dir->file_name, i.e. dir->file_name==go->parent */
-			
-			if ( go->f_type == HTTP_URL ) 
-			{
-				/* its an url 
-				 * url is shown as regular file 
-				 * its name is altered by changing / to .
-				 * www.gnu.org/gpl.html will be changed to
-				 * www.gnu.org.gpl.html */
-				char *slash;
-				conn_req=(char *)malloc((strlen(go->f_name)+8)*sizeof(char));
-				slash = strchr(go->f_name, '/');
-				if (slash)
-					url = strndup(go->f_name, slash - go->f_name);
-				else
-					url = strdup(go->f_name);
-				f_name = strdup(go->f_name);
-				int i;
-				for (i = 0; f_name[i] != '\0'; i++)
-					if (f_name[i] == '/')
-						f_name[i] = '.';
-				
-				sprintf(conn_req,"%s%s","http://",go->f_name);
-			}
-			else 
-			{	
-				/* its not an url */
-				f_name = strdup(go->f_name);
-				url=strdup(dir->url);
-				if ( go != list_of_entries )
-				{
-					size_t conn_req_size = strlen(dir->conn_req) + strlen(go->f_name) + 1;
-					if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
-						conn_req_size++; /* We'll need to add a trailing slash later. */
-					conn_req=(char *)malloc(conn_req_size*sizeof(char));
-					sprintf(conn_req,"%s%s",dir->conn_req,go->f_name);
-				}
-				else
-				{
-					if ( dir_tok[no_of_slashes] == NULL ) 
-					{
-						/* the file corresponding to base url
-						 * user has given a file explicitly in
-						 * the url */
-						size_t conn_req_size = strlen(dir->conn_req) + strlen(go->f_name) + 1;
-						if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
-							conn_req_size++; /* We'll need to add a trailing slash later. */
-						conn_req=(char *)malloc(conn_req_size*sizeof(char));
-						sprintf(conn_req,"%s%s",dir->conn_req,go->f_name);
-					}
-					else 
-					{
-						/* the file corresponding to base url
-						 * user has not given a file explicitly 
-						 * the url so its the index.html */
-						size_t conn_req_size = strlen(dir->conn_req) + 1;
-						if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED )
-							conn_req_size++; /* We'll need to add a trailing slash later. */
-						conn_req=(char *)malloc(conn_req_size*sizeof(char));
-						sprintf(conn_req,"%s",dir->conn_req);
-					}
-				}
-				if( go->f_type==HTTP_DIR || go->f_type==HTTP_DIR_NOT_FILLED ) 
-					/* the filled file is directory so it has to end
-					 * with a / */
-					strcat(conn_req,"/");
-			}
-			comm_buf=(char *)malloc((strlen(conn_req)+20)*sizeof(char));
-			sprintf(comm_buf,"GET %s HTTP/1.0",conn_req);
-
-			nd = httpfs_make_node (go->f_type,url,conn_req,comm_buf,f_name);
-			if (!nd)
-			{
-				err = ENOMEM;
-				return err;
-			}
-			free(comm_buf);
-			free(conn_req);
-			free(f_name);
-			*prevp = nd;
-			nd->prevp = prevp;
-			prevp = &nd->next;
-			dir->num_ents++;
-			if (dir->noents)
-				dir->noents = FALSE;
-		}
-	}
-	return err;
+/* Open the connection and prepare the socket for reading the body */
+error_t open_connection(struct netnode *node, int *fd, off_t *head_len) {
+    error_t err;
+    struct hostent *hptr;
+    struct sockaddr_in dest;
+    char buffer[4096];
+    ssize_t nread;
+
+    const char *relative_path = get_safe_path(node->conn_req);
+
+    /* Setup sockaddr */
+    bzero(&dest, sizeof(dest));
+    dest.sin_family = AF_INET;
+    dest.sin_port = htons(port);
+
+    err = lookup_host(httpfs_url_argz, &hptr);
+    if (err) return err;
+    dest.sin_addr = *(struct in_addr *)hptr->h_addr;
+
+    if (debug_flag)
+        fprintf(stderr, "DEBUG: FETCHING http://%s%s\n";, httpfs_url_argz, relative_path);
+
+    *fd = socket(AF_INET, SOCK_STREAM, 0);
+    if (*fd == -1) return errno;
+
+    if (connect(*fd, (struct sockaddr *)&dest, sizeof(dest)) == -1) {
+        close(*fd);
+        return errno;
+    }
+
+    /* HTTP/1.1 request correctly formatted */
+    /* We use \r\n as per requested by RFC standard */
+    int req_len = snprintf(buffer, sizeof(buffer),
+             "GET %s HTTP/1.1\r\n"
+             "Host: %s\r\n"
+             "User-Agent: GNU-Hurd-httpfs/0.1\r\n"
+             "Connection: close\r\n\r\n",
+             relative_path, httpfs_url_argz);
+
+    if (write(*fd, buffer, req_len) <= 0) {
+        close(*fd);
+        return EIO;
+    }
+
+    /* We only read a portion of the response for finding the headers */
+    nread = recv(*fd, buffer, sizeof(buffer) - 1, MSG_PEEK); 
+    if (nread <= 0) {
+        close(*fd);
+        return EIO;
+    }
+    buffer[nread] = '\0';
+
+    /* Verify if the response is valid */
+    if (!strstr(buffer, " 200") && !strstr(buffer, " 301") && !strstr(buffer, " 302")) {
+        fprintf(stderr, "httpfs: Server returned error or not found for %s\n", relative_path);
+        close(*fd);
+        return ENOENT;
+    }
+
+    /* We lookup for the end of the header */
+    char *header_end = strstr(buffer, "\r\n\r\n");
+    if (header_end) {
+        *head_len = (header_end - buffer) + 4;
+        
+        /* Consume headers from socket so that the next read() starts with the body */
+        char discard_buf[8192];
+        read(*fd, discard_buf, *head_len);
+    } else {
+        /* If it cannot find the end of the header in 4KB of buffer, there is a problem */
+        close(*fd);
+        return EPROTO;
+    }
+
+    return 0;
 }
 
+/* Fill the directory dir */
+error_t fill_dirnode(struct netnode *dir) {
+    error_t err = 0;
+    struct node *nd, **prevp;
+    struct files *go;
+
+    if (debug_flag)
+        fprintf(stderr, "DEBUG: Filling directory node: %s\n", dir->file_name);
+
+    if (dir->type == HTTP_DIR_NOT_FILLED) {
+        err = parse(dir);
+        if (err) {
+            fprintf(stderr, "httpfs: Parse error for %s\n", dir->file_name);
+            dir->type = HTTP_DIR;
+            return 0;
+        }
+        dir->type = HTTP_DIR;
+    }
+
+    dir->noents = TRUE;
+    dir->num_ents = 0;
+    prevp = &dir->ents;
+
+    for (go = list_of_entries; go != NULL; go = go->next) {
+        /* We filter the host itself to avoid infinite recursions */
+        if (strcmp(go->f_name, httpfs_url_argz) == 0) continue;
+
+        if (strcmp(dir->file_name, go->parent) == 0) {
+            char *f_name = strdup(go->f_name);
+            char *url = strdup(httpfs_url_argz);
+            char *conn_req;
+
+            /* Making of conn_req (the complete path for the server) */
+            if (go->f_type == HTTP_URL) {
+                asprintf(&conn_req, "http://%s";, go->f_name);
+            } else {
+                /* Concat the path of the father with the name of the file */
+                const char *p_path = dir->conn_req;
+                int p_len = strlen(p_path);
+                
+                if (p_path[p_len - 1] == '/')
+                    asprintf(&conn_req, "%s%s", p_path, go->f_name);
+                else
+                    asprintf(&conn_req, "%s/%s", p_path, go->f_name);
+            }
+
+            /* If it is a directory, assure it ends up with a / */
+            if (go->f_type == HTTP_DIR || go->f_type == HTTP_DIR_NOT_FILLED) {
+                if (conn_req[strlen(conn_req)-1] != '/') {
+                    char *tmp;
+                    asprintf(&tmp, "%s/", conn_req);
+                    free(conn_req);
+                    conn_req = tmp;
+                }
+            }
+
+            nd = httpfs_make_node(go->f_type, url, conn_req, "", f_name);
+            if (nd) {
+                *prevp = nd;
+                nd->prevp = prevp;
+                prevp = &nd->next;
+                dir->num_ents++;
+                dir->noents = FALSE;
+            }
+
+            free(conn_req); free(url); free(f_name);
+        }
+    }
+    return 0;
+}
Index: httpfs.c
===================================================================
RCS file: /sources/hurdextras/httpfs/httpfs.c,v
retrieving revision 1.3
diff -u -r1.3 httpfs.c
--- httpfs.c	14 Jan 2013 10:47:05 -0000	1.3
+++ httpfs.c	12 Jan 2026 19:36:38 -0000
@@ -24,6 +24,7 @@
 #include <errno.h>
 #include <error.h>
 #include <argp.h>
+#include <argz.h>
 
 #include <hurd/netfs.h>
 
@@ -36,7 +37,10 @@
 int no_of_slashes = 0;
 char *url, *conn_req;
 char *ip_addr;
-char *dir_tok[25];
+
+char *httpfs_url_argz = NULL;
+size_t httpfs_url_argz_len = 0;
+
 struct files *list_of_entries = NULL, *this_entry;
 
 struct httpfs *httpfs;		/* filesystem global pointer */
@@ -50,9 +54,12 @@
 {
   error_t err;
   mach_port_t bootstrap;
-  char *temp_url, *temp, *run;
+  char *temp_url, *clean_url;
+  char *host_name = NULL;
   char type;
-  char *comm_buf; /* XXX: Is an http request limited to 200 bytes? */
+  char *comm_buf;
+
+  /* Defaults */
   port = 80;
   debug_flag = 0;
   mode = 1;			/* means directory */
@@ -60,6 +67,7 @@
 
   if (debug_flag)
     fprintf (stderr, "pid %d\n", getpid ());
+
   httpfs_parse_args (argc, argv);
 
   task_get_bootstrap_port (mach_task_self (), &bootstrap);
@@ -70,77 +78,61 @@
   if (err)
     error (1, 0, "Map time error.");
 
-  if (strchr (url, '/') == NULL)
-    error (1, 0, "Url must have a /, e.g., www.gnu.org/");
+  extern char *url;
 
-  conn_req = (char *) malloc ((strlen (url) + 7) * sizeof (char));
-  if (! conn_req)
-    error (1, errno, "Cannot malloc conn_req.");
-
-  temp_url = strdup (url);
-  if (! temp_url)
-    error (1, errno, "Cannot duplicate url.");
-
-  if (!strncmp (temp_url, "http://";, 7))
-    /* go ahead of http:// if given in url */
-    temp_url = temp_url + 7;
-
-  if (strchr (temp_url, '/') == NULL)
-    error (1, 0, "Url must have a /, e.g., www.gnu.org/");
-
-  /* XXX: strtok is not reentrant.  This will have to be fixed */
-  temp = strdup (temp_url);
-  url = strtok (temp, "/");
-
-  /* Find the directories given in URL */
-  temp = strdup (temp_url);
-  no_of_slashes++;
-  strcpy (temp, strchr (temp, '/'));
-  temp++;
-  while (strchr (temp, '/') != NULL)
-    {
-      /* go to the end of url */
-      run = strdup (temp);
-      dir_tok[no_of_slashes - 1] = strtok (run, "/");
-      strcpy (temp, strchr (temp, '/'));
-      temp++;
-      no_of_slashes++;
-    }
-  if (strlen (temp))
-    {
-      /* user has input a specific html file in the url */
-      dir_tok[no_of_slashes - 1] = strdup (temp);
-      dir_tok[no_of_slashes] = NULL;
-    }
-  else
-    {
-      /* user has input just an url no file names specifed 
-       * assume the base url request is to index.html */
-      dir_tok[no_of_slashes - 1] = strdup ("index.html");
-      dir_tok[no_of_slashes] = strdup ("index.html");
-    }
+  if (url == NULL || strlen (url) == 0)
+    error (1, 0, "URL must not be empty.");
+
+  /* Remove http:// if present */
+  clean_url = url;
+  if (strncasecmp (clean_url, "http://";, 7) == 0)
+    clean_url = clean_url + 7;
+
+  /* Directory hierarchy creation with argz. This substitute strtok. */
+  /* It breaks the string 'clean_url' every time it finds a '/' character. */
+  if (argz_create_sep (clean_url, '/', &httpfs_url_argz, &httpfs_url_argz_len) != 0)
+    error (1, errno, "Cannot create directory hierarchy from parsing the URL.");
+
+  /* Extracts hostname */
+  host_name = httpfs_url_argz;
+
+  if (!host_name)
+    error (1, 0, "Invalid URL: No hostname found.");	
 
+  /* Build the complete URL for the GET request by iterating the argz vector */
+  /* e.g. conn_req: "http://host/path/file"; */
+
+  size_t total_len = 7 + 1; /* "http://"; + null terminator */
+  char *entry = NULL;
+
+  /* Calculate required total length first */
+  while ((entry = argz_next (httpfs_url_argz, httpfs_url_argz_len, entry)))
+    total_len = total_len + strlen(entry) + 1; /* +1 is for the slash '/' */
+
+  conn_req = (char *) malloc (total_len);
+  if (!conn_req)
+    error (1, errno, "Cannot allocate connection request string.");
+
+  /* Build our string */
   strcpy (conn_req, "http://";);
-  if (temp_url[strlen (temp_url) - 1] == '/')
-    {
-      strcat (conn_req, temp_url);
-      err = asprintf (&comm_buf, "GET %s HTTP/1.0", conn_req);
-    }
-  else
-    {
-      while (strchr (temp_url, '/') != NULL)
-	{
-	  temp = strdup (temp_url);
-	  strcat (conn_req, strtok (temp, "/"));
-	  strcat (conn_req, "/");
-	  strcpy (temp_url, strchr (temp_url, '/'));
-	  temp_url++;
-	}
-      err = asprintf (&comm_buf, "GET %s%s HTTP/1.0", conn_req, temp_url);
-    }
-  if (err < 0)  /* check the return value of asprintf */
-    error (1, errno, "Cannot allocate comm_buf.");
 
+  entry = NULL;
+  int first = 1;
+  while ((entry = argz_next (httpfs_url_argz, httpfs_url_argz_len, entry)))
+    {
+      strcat (conn_req, entry);
+      /* Add the slash if not the last element, or if it is the host */
+      if (entry < (httpfs_url_argz + httpfs_url_argz_len - strlen(entry) - 1) || first)
+        strcat (conn_req, "/");
+      first = 0;
+    }
+
+  /* Creation of GET request buffer */
+  /* TODO: For monder HTTP, we should add here "Host: %s\r\n" */
+  if (asprintf (&comm_buf, "GET %s HTTP/1.0\r\n\r\n", conn_req) < 0)
+    error (1, errno, "Cannot allocate command request string.");
+
+  /* Initialize the filesystem */
   httpfs = (struct httpfs *) malloc (sizeof (struct httpfs));
   if (! httpfs)
     error (1, errno, "Cannot allocate httpfs.");
@@ -149,15 +141,19 @@
   httpfs->uid = getuid ();
   httpfs->gid = getgid ();
   httpfs->next_inode = 0;
+
   if (mode)
     type = HTTP_DIR;
   else
     type = HTTP_FILE;
 
+  /* Create root node */
   /* XXX: why is tmp hardcoded? */
   httpfs->root = httpfs_make_node (type, url, conn_req, comm_buf, "tmp");
+
   netfs_init ();
-  /* translator set to a directory */
+
+  /* If a directory, populates the contents. */
   if (mode)
     {
       /* fill the directory node with files 
@@ -176,10 +172,15 @@
 
   netfs_root_node = httpfs->root;
   netfs_startup (bootstrap, 0);
+
   for (;;)
     netfs_server_loop ();
 
   /* NOT REACHED */
+  free (conn_req);
+  free (httpfs_url_argz);
+  free (comm_buf);
   free (httpfs);
+
   return 0;
 }
Index: httpfs.h
===================================================================
RCS file: /sources/hurdextras/httpfs/httpfs.h,v
retrieving revision 1.2
diff -u -r1.2 httpfs.h
--- httpfs.h	14 Jan 2013 10:47:05 -0000	1.2
+++ httpfs.h	12 Jan 2026 19:36:38 -0000
@@ -20,6 +20,7 @@
 #include <maptime.h>
 #include <sys/stat.h>
 #include <netdb.h>
+#include <argz.h>
 
 #include <hurd/hurd_types.h>
 
@@ -41,17 +42,17 @@
  * mode accepted as command line argument */
 extern int mode;
 
-/* no of / in the url indicates the level of directory to go into the web server * cannot move to a top level directory from the given url 
- * eg: www.xyz/file/temp/ is the url from here we cannot show the contents of 
- * the root directory / of file/ directory
- * only contents of temp/ and its subdirectories can be supported */
-extern int no_of_slashes;
-
-/* if the url points to particular file explicitly given store here 
- * else assume it to be index.html 
- * like www.gnu.org/gpl.html and www.gnu.org/  no file given so index.html */
-extern char *dir_tok[25];
-
+/* httpfs_url_argz points to a vector of strings in the GNU argz format.
+ * It contains the components of the original URL separated by the null byte ('\0').
+ * E.g. if the URL is http://gnu.org/software/hurd, the vector will contain:
+ * ["gnu.org", "software", "hurd", "index.html"] 
+ * It is used to navigate the directory structure of the remote web server */
+extern char *httpfs_url_argz;
+
+/* *httpfs_url_argz_len stores the total length in bytes of the vector httpfs_url_argz.
+ * It is necessary for functions like argz_next() and argz_count()
+ * to determine the boundaries of the allocated memory. */
+extern size_t httpfs_url_argz_len;
 
 /* handle all initial parameter parsing */
 error_t httpfs_parse_args (int argc, char **argv);
Index: parsehtml.c
===================================================================
RCS file: /sources/hurdextras/httpfs/parsehtml.c,v
retrieving revision 1.2
diff -u -r1.2 parsehtml.c
--- parsehtml.c	27 Jan 2013 23:14:23 -0000	1.2
+++ parsehtml.c	12 Jan 2026 19:36:38 -0000
@@ -109,13 +109,20 @@
 	parent = strdup(node->file_name);
 	if ( debug_flag )
 		fprintf(stderr,"In the HTML parser for parsing %s\n",parent);
+
+	char *last_component = httpfs_url_argz;
+	char *entry_ptr = NULL;
+
+	while ((entry_ptr = argz_next(httpfs_url_argz, httpfs_url_argz_len, entry_ptr))) {
+	    last_component = entry_ptr;
+	}
 	
 	/* Create a file for base url */
 	if ( list_of_entries == NULL )
 	{
 		/* The node of the url entered */
 		list_of_entries = (struct files *)malloc(sizeof(struct files));
-		list_of_entries->f_name = strdup(dir_tok[no_of_slashes-1]);
+		list_of_entries->f_name = strdup(last_component);
 		list_of_entries->parent = strdup("tmp");
 		list_of_entries->f_size = 0;//content_len;
 		list_of_entries->f_type = HTTP_FILE;

Reply via email to