upload domain handler

colin Wed, 12 Jul 2000 20:07:48 -0700

Hi,

I've finished the upload domain handler, and it occurred to me that I ought to 
explain the motivation (given that tclhttpd already handles POST quite well.)

I have an application which requires the upload of large files, say much 
greater than 10Mb in length.  tclhttpd attempts to write the whole POST 
content into tcl variables.  While this is possible, it's unlikely (IMHO) to 
scale well over a large number of connections all trying to upload large files.

So the intention of this domain handler is to keep the uploaded file as a 
file, as much as possible.  To do this, it uses a /tmp/POST/ dir, and a unix 
program called csplit to do the MIME munging outside RAM as far as possible.

It now works (in the sense of creating a file with the right name, and the 
same checksum, in /tmp/POST) well enough, although to try it you'll have to 
modify the call to AAPformat as a return value.

Here's the html I'm using to provoke it, only tried it from Netscape:

<form ENCtype=multipart/form-data action="/upload/mime" method=post>

<input type=hidden name=hide_me value="hello, world">
<input type=submit value="Send">
<label for="file">File </label><input type=file name=the_file>

</form>

Colin.

# Handler for file Uploads via POST
#
# This domain handles uploads as files, instead of attempting to contain them in
# variables - useful for handling files which are too large to comfortably fit into
# memory

package provide upload 1.0
Url_PrefixInstall /upload UploadDomain

# we need some space in /tmp to hold our POST files
catch {
    file mkdir /tmp/POST/
    file delete -force /tmp/POST/*
}

proc UploadDomain {sock suffix} {
    upvar #0 Httpd$sock data

    # Set up the environment a-la CGI
    global env
    Cgi_SetEnv $sock upload/$suffix
    
    set cmd upload$suffix
    if {![iscommand $cmd]} {
	Httpd_Error $sock 403
	return
    }

    set valuelist {}
    if [info exists data(query)] {
	# search for comma separeted pair of numbers
	# as generated from server side map
	#      e.g 190,202
	# Bjorn Ruff.
        
	if { [regexp {^([0-9]+),([0-9]+)$} $data(query) match x y]} {
	    set data(query) x=$x&y=$y
	}
        
	# Honor content type of the query data
	if {[info exist data(mime,content-type)]} {
	    set type $data(mime,content-type)
	} else {
	    set type application/x-www-urlencoded
	}

        # fetch the POST data into a file
        if {$data(proto) == "POST"} {
            set channel [open /tmp/POST/${suffix}.$sock w+]
            fconfigure $channel -translation {binary binary}

            catch {fileevent $sock readable {}}

            Url_PostHook $sock 0
            fcopy $sock $channel -size $data(count)
            set data(count) 0	;# stop tclhttpd intervening in our read
            #close $channel

            seek $channel 0	;# rewind the channel
        }
        
	set valuelist [Url_DecodeQuery $data(query) -type $type]

	# Parse form parameters into the cgi array
	# If the parameter is listed twice, the array becomes a list
	# of the values.
        
	foreach {name value} $valuelist {
	    if [info exists list($name)] {
		set cgi($name) [list $cgi($name) $value]
		unset list($name)
	    } elseif [info exists cgi($name)] {
		lappend cgi($name) $value
	    } else {
		set cgi($name) $value
		set list($name) 1	;# Need to listify if more values are added
	    }
	}
    }

    if {[string length [info command $cmd]] == 0} {
	auto_load $cmd
    }
    
    # Compare built-in command's parameters with the form data.
    # Form fields with names that match arguments have that value
    # passed for the corresponding argument.
    # Form fields with no corresponding parameter are collected into args.
    
    set cmdOrig $cmd
    set params [info args $cmdOrig]
    if {$data(proto) == "POST"} {
        lappend cmd /tmp/POST/${suffix}.$sock $channel
    }
    foreach arg $params {
	if ![info exists cgi($arg)] {
	    if [info default $cmdOrig $arg value] {
		lappend cmd $value
	    } elseif {[string compare $arg "args"] == 0} {
		set needargs yes
	    } else {
		lappend cmd {}
	    }
	} else {
	    lappend cmd $cgi($arg)
	}
    }
    if [info exists needargs] {
	foreach {name value} $valuelist {
	    if {[lsearch $params $name] < 0} {
		lappend cmd $name $value
	    }
	}
    }
    # Eval the command.  Errors can be used to trigger redirects.
    
    set code [catch $cmd result]
    switch $code {
	0	{ # fall through to Httpd_ReturnData
        }
        302	{ # redirect 
            Httpd_Redirect $result $sock
            return ""
        }
        default {
            global errorInfo errorCode
            return -code $code -errorinfo $errorInfo -errorcode $errorCode $result
        }
    }
    
    # See if a content type has been registered for the URL
    
    set type text/html
    upvar #0 $cmdOrig aType
    if {[info exist aType]} {
        set type $aType
    }

    # See if any cookies have been set
    # This works with the Doc_SetCookie procedure that populates
    # the global page array
    
    global page
    if {[info exist page(set-cookie)]} {
        foreach c $page(set-cookie) {
            Httpd_SetCookie $sock $c
        }
        unset page(set-cookie)
    }
    
    Httpd_ReturnData $sock $type $result
    return ""
}

# discover the boundary string
proc get_boundary {type} {
    set parsedType [ncgi::parseMimeValue $type]
    if {![string match multipart/* [lindex $parsedType 0]]} {
	return -code error "Not a multipart Content-Type: [lindex $parsedType 0]"
    }
    array set options [lindex $parsedType 1]
    if {![info exists options(boundary)]} {
	return -code error "No boundary given for multipart document"
    }
    return $options(boundary)
}

# part_header
#
#	This parses multipart form data headers from a file
#
#	Based on ncgi multipart parsing, based on work by Steve Ball for TclHttpd,
#	but re-written to use gets on a file to iterate through the data
# Arguments:
#	file	an open file containing the multipart body
#
# Results:
#	An list of headers as name, value pairs
#	The header name/value pairs come primarily from the MIME headers
#	like Content-Type that appear in each part.  However, the
#	Content-Disposition header is handled specially.  It has several
#	parameters like "name" and "filename" that are important, so they
#	are promoted to to the same level as Content-Type.  Otherwise,
#	if a header like Content-Type has parameters, they appear as a list
#	after the primary value of the header.  For example, if the
#	part has these two headers:
#
#	Content-Disposition: form-data; name="Foo"; filename="/a/b/C.txt"
#	Content-Type: text/html; charset="iso-8859-1"; mumble='extra'
#	
#	Then the header list will have this structure:
#	{
#		content-disposition form-data
#		name Foo
#		filename /a/b/C.txt
#		content-type {text/html {charset iso-8859-1 mumble extra}}
#	}
#	Note that the header names are mapped to all lowercase.  You can
#	use "array set" on the header list to easily find things like the
#	filename or content-type.  You should always use [lindex $value 0]
#	to account for values that have parameters, like the content-type
#	example above.  Finally, not that if the value has a second element,
#	which are the parameters, you can "array set" that as well.
#
#	The file is left at the beginning of content.
#
#	an empty string return implies end of file
proc part_header {file} {
    # Split headers out from content
    # The headers become a nested list structure:
    #	{header-name {
    #		value {
    #			paramname paramvalue ... }
    #		}
    #	}
    set headers ""
    while {[gets $file line] != -1} {
        if {$line == ""} {
            return $headers
        }
        if {[regexp {([^:	 ]+):[ ]*(.*)$} $line x hdrname value]} {
            set hdrname [string tolower $hdrname]
            set valueList [ncgi::parseMimeValue $value]
            if {[string equal $hdrname "content-disposition"]} {
                # Promote Conent-Disposition parameters up to headers,
                # and look for the "name" that identifies the form element
                
                lappend headers $hdrname [lindex $valueList 0]
                foreach {n v} [lindex $valueList 1] {
                    lappend headers $n $v
                    if {[string equal $n "name"]} {
                        set formName $v
                    }
                }
            } else {
                lappend headers $hdrname $valueList
            }
        }
    }
    return $headers
}

# when the upload/ proc is called, it has a file argument,
# being the name of an open channel pointing to the beginning of POST data

proc upload/test {filename file args} {
    set sock [uplevel set sock]
    upvar #0 Httpd$sock data
    set result "<p>[uplevel set cmdOrig] | [info level -1]<p>"
    append result "FILE: $file ARGS: $args"
    foreach name [array names data] {
        append result "<p>${name}: $data($name)"
    }
    append result "<p>ENV:[array get env]"
    append result "<p>MIME: [ncgi::parseMimeValue $data(mime,content-type)]"
    #append result "<p>[ncgi::multipart $data(mime,content-type)]"
    close $file
    return [AAPformat Upload $result]
}

proc upload/mime0 {filename file args} {
    close $file
    return [AAPformat Mime "Done"]
}

proc upload/mime {filename file args} {
    close $file	;# don't need the open file anymore.

    upvar 1 data data
    set boundary [get_boundary $data(mime,content-type)] ;# get boundary string

    # split the MIME file up into chunks in /tmp/POST/
    exec csplit -z --prefix=${filename}. $filename /$boundary/+0 \{*\}

    set pattern [file join [file dirname $filename] ${filename}.* ]
    foreach chunk [glob -nocomplain $pattern] {
        set size [file size $chunk]	;# how long is this chunk?
        set chnum [string range [file extension $chunk] 1 end]	;# ordinal chunk
        set f [open $chunk r]
        gets $f	;# skip boundary line
        array set header [part_header $f]	;# grab the header
        fconfigure $f -translation {binary binary}
    
        if {[info exists header(filename)]} {
            # this is our file - open the real name
            set to [open [file join /tmp POST $header(filename)] w]

            # the magic length 2 is because MIME adds cr/lf to the end of the file
            fcopy $f $to -size [expr $size - [tell $f] - 2]
            close $to
        }
        close $f
        file delete $chunk	;# clean up the chunk
        unset header
    }
    file delete $filename	;# clean up the original POST file
    return [AAPformat Mime "Done"]
}

upload domain handler

Reply via email to