Hi,
I've finished the upload domain handler, and it occurred to me that I ought to
explain the motivation (given that tclhttpd already handles POST quite well.)
I have an application which requires the upload of large files, say much
greater than 10Mb in length. tclhttpd attempts to write the whole POST
content into tcl variables. While this is possible, it's unlikely (IMHO) to
scale well over a large number of connections all trying to upload large files.
So the intention of this domain handler is to keep the uploaded file as a
file, as much as possible. To do this, it uses a /tmp/POST/ dir, and a unix
program called csplit to do the MIME munging outside RAM as far as possible.
It now works (in the sense of creating a file with the right name, and the
same checksum, in /tmp/POST) well enough, although to try it you'll have to
modify the call to AAPformat as a return value.
Here's the html I'm using to provoke it, only tried it from Netscape:
<form ENCtype=multipart/form-data action="/upload/mime" method=post>
<input type=hidden name=hide_me value="hello, world">
<input type=submit value="Send">
<label for="file">File </label><input type=file name=the_file>
</form>
Colin.
# Handler for file Uploads via POST
#
# This domain handles uploads as files, instead of attempting to contain them in
# variables - useful for handling files which are too large to comfortably fit into
# memory
package provide upload 1.0
Url_PrefixInstall /upload UploadDomain
# we need some space in /tmp to hold our POST files
catch {
file mkdir /tmp/POST/
file delete -force /tmp/POST/*
}
proc UploadDomain {sock suffix} {
upvar #0 Httpd$sock data
# Set up the environment a-la CGI
global env
Cgi_SetEnv $sock upload/$suffix
set cmd upload$suffix
if {![iscommand $cmd]} {
Httpd_Error $sock 403
return
}
set valuelist {}
if [info exists data(query)] {
# search for comma separeted pair of numbers
# as generated from server side map
# e.g 190,202
# Bjorn Ruff.
if { [regexp {^([0-9]+),([0-9]+)$} $data(query) match x y]} {
set data(query) x=$x&y=$y
}
# Honor content type of the query data
if {[info exist data(mime,content-type)]} {
set type $data(mime,content-type)
} else {
set type application/x-www-urlencoded
}
# fetch the POST data into a file
if {$data(proto) == "POST"} {
set channel [open /tmp/POST/${suffix}.$sock w+]
fconfigure $channel -translation {binary binary}
catch {fileevent $sock readable {}}
Url_PostHook $sock 0
fcopy $sock $channel -size $data(count)
set data(count) 0 ;# stop tclhttpd intervening in our read
#close $channel
seek $channel 0 ;# rewind the channel
}
set valuelist [Url_DecodeQuery $data(query) -type $type]
# Parse form parameters into the cgi array
# If the parameter is listed twice, the array becomes a list
# of the values.
foreach {name value} $valuelist {
if [info exists list($name)] {
set cgi($name) [list $cgi($name) $value]
unset list($name)
} elseif [info exists cgi($name)] {
lappend cgi($name) $value
} else {
set cgi($name) $value
set list($name) 1 ;# Need to listify if more values are added
}
}
}
if {[string length [info command $cmd]] == 0} {
auto_load $cmd
}
# Compare built-in command's parameters with the form data.
# Form fields with names that match arguments have that value
# passed for the corresponding argument.
# Form fields with no corresponding parameter are collected into args.
set cmdOrig $cmd
set params [info args $cmdOrig]
if {$data(proto) == "POST"} {
lappend cmd /tmp/POST/${suffix}.$sock $channel
}
foreach arg $params {
if ![info exists cgi($arg)] {
if [info default $cmdOrig $arg value] {
lappend cmd $value
} elseif {[string compare $arg "args"] == 0} {
set needargs yes
} else {
lappend cmd {}
}
} else {
lappend cmd $cgi($arg)
}
}
if [info exists needargs] {
foreach {name value} $valuelist {
if {[lsearch $params $name] < 0} {
lappend cmd $name $value
}
}
}
# Eval the command. Errors can be used to trigger redirects.
set code [catch $cmd result]
switch $code {
0 { # fall through to Httpd_ReturnData
}
302 { # redirect
Httpd_Redirect $result $sock
return ""
}
default {
global errorInfo errorCode
return -code $code -errorinfo $errorInfo -errorcode $errorCode $result
}
}
# See if a content type has been registered for the URL
set type text/html
upvar #0 $cmdOrig aType
if {[info exist aType]} {
set type $aType
}
# See if any cookies have been set
# This works with the Doc_SetCookie procedure that populates
# the global page array
global page
if {[info exist page(set-cookie)]} {
foreach c $page(set-cookie) {
Httpd_SetCookie $sock $c
}
unset page(set-cookie)
}
Httpd_ReturnData $sock $type $result
return ""
}
# discover the boundary string
proc get_boundary {type} {
set parsedType [ncgi::parseMimeValue $type]
if {![string match multipart/* [lindex $parsedType 0]]} {
return -code error "Not a multipart Content-Type: [lindex $parsedType 0]"
}
array set options [lindex $parsedType 1]
if {![info exists options(boundary)]} {
return -code error "No boundary given for multipart document"
}
return $options(boundary)
}
# part_header
#
# This parses multipart form data headers from a file
#
# Based on ncgi multipart parsing, based on work by Steve Ball for TclHttpd,
# but re-written to use gets on a file to iterate through the data
# Arguments:
# file an open file containing the multipart body
#
# Results:
# An list of headers as name, value pairs
# The header name/value pairs come primarily from the MIME headers
# like Content-Type that appear in each part. However, the
# Content-Disposition header is handled specially. It has several
# parameters like "name" and "filename" that are important, so they
# are promoted to to the same level as Content-Type. Otherwise,
# if a header like Content-Type has parameters, they appear as a list
# after the primary value of the header. For example, if the
# part has these two headers:
#
# Content-Disposition: form-data; name="Foo"; filename="/a/b/C.txt"
# Content-Type: text/html; charset="iso-8859-1"; mumble='extra'
#
# Then the header list will have this structure:
# {
# content-disposition form-data
# name Foo
# filename /a/b/C.txt
# content-type {text/html {charset iso-8859-1 mumble extra}}
# }
# Note that the header names are mapped to all lowercase. You can
# use "array set" on the header list to easily find things like the
# filename or content-type. You should always use [lindex $value 0]
# to account for values that have parameters, like the content-type
# example above. Finally, not that if the value has a second element,
# which are the parameters, you can "array set" that as well.
#
# The file is left at the beginning of content.
#
# an empty string return implies end of file
proc part_header {file} {
# Split headers out from content
# The headers become a nested list structure:
# {header-name {
# value {
# paramname paramvalue ... }
# }
# }
set headers ""
while {[gets $file line] != -1} {
if {$line == ""} {
return $headers
}
if {[regexp {([^: ]+):[ ]*(.*)$} $line x hdrname value]} {
set hdrname [string tolower $hdrname]
set valueList [ncgi::parseMimeValue $value]
if {[string equal $hdrname "content-disposition"]} {
# Promote Conent-Disposition parameters up to headers,
# and look for the "name" that identifies the form element
lappend headers $hdrname [lindex $valueList 0]
foreach {n v} [lindex $valueList 1] {
lappend headers $n $v
if {[string equal $n "name"]} {
set formName $v
}
}
} else {
lappend headers $hdrname $valueList
}
}
}
return $headers
}
# when the upload/ proc is called, it has a file argument,
# being the name of an open channel pointing to the beginning of POST data
proc upload/test {filename file args} {
set sock [uplevel set sock]
upvar #0 Httpd$sock data
set result "<p>[uplevel set cmdOrig] | [info level -1]<p>"
append result "FILE: $file ARGS: $args"
foreach name [array names data] {
append result "<p>${name}: $data($name)"
}
append result "<p>ENV:[array get env]"
append result "<p>MIME: [ncgi::parseMimeValue $data(mime,content-type)]"
#append result "<p>[ncgi::multipart $data(mime,content-type)]"
close $file
return [AAPformat Upload $result]
}
proc upload/mime0 {filename file args} {
close $file
return [AAPformat Mime "Done"]
}
proc upload/mime {filename file args} {
close $file ;# don't need the open file anymore.
upvar 1 data data
set boundary [get_boundary $data(mime,content-type)] ;# get boundary string
# split the MIME file up into chunks in /tmp/POST/
exec csplit -z --prefix=${filename}. $filename /$boundary/+0 \{*\}
set pattern [file join [file dirname $filename] ${filename}.* ]
foreach chunk [glob -nocomplain $pattern] {
set size [file size $chunk] ;# how long is this chunk?
set chnum [string range [file extension $chunk] 1 end] ;# ordinal chunk
set f [open $chunk r]
gets $f ;# skip boundary line
array set header [part_header $f] ;# grab the header
fconfigure $f -translation {binary binary}
if {[info exists header(filename)]} {
# this is our file - open the real name
set to [open [file join /tmp POST $header(filename)] w]
# the magic length 2 is because MIME adds cr/lf to the end of the file
fcopy $f $to -size [expr $size - [tell $f] - 2]
close $to
}
close $f
file delete $chunk ;# clean up the chunk
unset header
}
file delete $filename ;# clean up the original POST file
return [AAPformat Mime "Done"]
}