On Tue, 20 Jul 2004, Albert Reiner wrote:

> Hi,
> 
> I wonder whether there is a simple, scriptable way of putting pages
> into the cache under a given URI.

What I do :-

[ background: I scoop in one place, ship the data somewhere else
  via UUCP, unpack into the cache, browse from there ]

I create a complete virtual instance of wwwoffle in /tmp, with
config file pointing to an upstream cache if necessary, run
wget --delete-after URL, tar up what was the virgin cache, and
carry that to the other side.

I do it with shell scripting.

Hey - its only 5K - here it is - a bit wizzy'ish.

Some assembly required.

If there is more interest, I could tidy it up.

Cheers,    Andy!

http://wizzy.org.za/

#! /bin/sh

# Script to do scoops for a remote requestor, and pass the
# results back to the requestor.

# Andy Rabagliati <[EMAIL PROTECTED]>

# Wizzy Digital Courier  http://www.wizzy.org.za/

DOMAIN=wizzy.org.za

for f in /var/spool/uucp/.scoops/*
do
  if [ -f $f ]
  then
    while read key value
    do
      case $key in
        Url:)
          URL=$value
          ;;
        Priority:)
          PRIORITY=$value
          ;;
        Host:)
          HOST=$value
          ;;
        Level:)
          LEVEL=$value
          ;;
        Span:)
          if [ $value = yes ] ; then
            SPAN=--span-hosts
          fi
          ;;
        When:)
          WHEN=$value
          ;;
      esac
    done < $f
    echo $URL $PRIORITY $HOST $LEVEL $WHEN
    if [ -z $URL -o -z $PRIORITY -o -z $HOST -o -z $LEVEL -o -z $WHEN ] ; then
      echo $f busted
      continue
    fi
    site=`echo $URL | sed -e 's%[a-z]*://%%' -e s%/.*%%`
    if ! echo Xquit | telnet -e X $site 80 ; then
      continue
    fi

    # check for lock - allow for 4 concurrent fetches

    for i in 1 2 3 4
    do
        if [ ! -f /var/lock/uucp/woffle$i ]
        then
            LOCK=/var/lock/uucp/woffle$i
            break
        fi
    done
  
    if [ X$LOCK = X ]
    then
        exit            # not this time
    fi

    # the upstream proxy - should be online !!
    PROXY=localhost:8080
  
    WGET="wget --quiet --recursive --page-requisites --no-directories --delete-after 
--level $LEVEL $SPAN $URL"
  
    WOFFLEDIR=/tmp/woffle$$
    WOFFLECONFIG=$WOFFLEDIR/wwwoffle.conf
    WOFFLE="/usr/bin/wwwoffle -c $WOFFLECONFIG"
    UUROUTE=`/usr/bin/ldapsearch -x -LLL -b ou=mboxsvrs,dc=wizzy,dc=org,dc=za 
uuHost=$HOST uuRoute` || exit 1
    UUROUTE=`echo $UUROUTE | /bin/sed -ne '/uuRoute/s/.*: //p'`
    let PROXYPORT=$$%400*2+8090         # even number between 8090 and 9000
    let CONTROLPORT=$PROXYPORT+1
  
    if [ -z $UUROUTE ]
    then
        echo no route to $HOST in ldap
        continue
    fi
  
    trap "/bin/rm -rf $LOCK $WOFFLEDIR" 0 1 2 15
  
    # create lock
    echo $$ > $LOCK
  
    mv $f /var/spool/uucp/.Temp
    SELF=$(/usr/bin/uuname -l)
  
    mkdir $WOFFLEDIR
    cd $WOFFLEDIR
    mkdir ftp http lasttime outgoing temp
  
    cat >$WOFFLECONFIG <<END_OF_CONFIG
# WWWOFFLE Configuration file
  
StartUp
{
   bind-ipv4         = 0.0.0.0
   http-port         = $PROXYPORT
   wwwoffle-port     = $CONTROLPORT
   spool-dir         = $WOFFLEDIR
   run-uid           = uucp
   run-gid           = uucp
   use-syslog        = yes
   password          = none
   max-servers       = 8
   max-fetch-servers = 4
}
  
Options
{
  dir-perm              = 0775
  file-perm             = 0664
}

LocalHost
{
   ${SELF}.${DOMAIN}
   localhost
}
  
MIMETypes
{
   default  = text/plain
  
   .Z       = application/x-compress
   .au      = audio/basic
   .avi     = video/x-msvideo
   .class   = application/java
   .cpio    = application/x-cpio
   .css     = text/css
   .deb     = application/octet-stream
   .dtd     = application/xml
   .dvi     = application/x-dvi
   .eps     = application/postscript
   .gif     = image/gif
   .gz      = application/x-gzip
   .htm     = text/html
   .html    = text/html
   .jpeg    = image/jpeg
   .jpg     = image/jpeg
   .js      = application/x-javascript
   .latex   = application/x-latex
   .man     = application/x-troff-man
   .me      = application/x-troff-me
   .mov     = video/quicktime
   .mpeg    = video/mpeg
   .mpg     = video/mpeg
   .ms      = application/x-troff-ms
   .pac     = application/x-ns-proxy-autoconfig
   .pbm     = image/x-portable-bitmap
   .pdf     = application/pdf
   .pgm     = image/x-portable-graymap
   .png     = image/png
   .pnm     = image/x-portable-anymap
   .ppm     = image/x-portable-pixmap
   .ps      = application/postscript
   .ras     = image/x-cmu-raster
   .rgb     = image/x-rgb
   .rpm     = application/octet-stream
   .rtf     = application/rtf
   .snd     = audio/basic
   .tar     = application/x-tar
   .tcl     = application/x-tcl
   .tex     = application/x-tex
   .texi    = application/x-texinfo
   .texinfo = application/x-texinfo
   .tif     = image/tiff
   .tiff    = image/tiff
   .tr      = application/x-troff
   .txt     = text/plain
   .vr      = model/vrml
   .wav     = audio/x-wav
   .wrl     = model/vrml
   .xbm     = image/x-xbitmap
   .xml     = application/xml
   .xpm     = image/x-xpixmap
   .xwd     = image/x-xwindowdump
   .zip     = application/zip
}
 
Proxy
{
   <http://*> proxy = $PROXY
}
END_OF_CONFIG
  
    #PERLSCRIPT_FILES='($h, $s)=($_=~ m%^(\S+).*http://([^/]+)/%); print 
"./http/$s/$h"; substr($h,0,1,"U"); print "./http/$s/$h";'
    /usr/sbin/wwwoffled -c $WOFFLECONFIG
    # put freshly online
    $WOFFLE -online
  
    cd /tmp
    logger -t scooping $URL
    http_proxy=http://localhost:$PROXYPORT/ $WGET
  
    # offline again
    $WOFFLE -offline
    $WOFFLE -kill               # cause wwwoffled to exit
  
    cd $WOFFLEDIR/http
    for site in *
    do
        cd $site
        for f in D*
        do
            if head -1 $f | grep -q '^HTTP.1.0 404 WWWOFFLE Refused Request'
            then
                rm -f $f U${f#D}
            fi
        done
        cd ..
        if [ 2 == $(stat -c %h $site) ]
        then
            rmdir $site
        fi
    done


    # send requested files back to client
    /bin/tar --create --gzip --dereference --directory $WOFFLEDIR http ftp | \
            uux --stdin $UUROUTE\!woffle_unpack
  
    # remove lock and temporary woffle instance
    /bin/rm -rf $LOCK $WOFFLEDIR
  fi
done

Reply via email to