On Fri, 9 Jun 2000, Petru Paler wrote:

> > I'd love some sort of benchmarking tool for this (and posibly other 
> > Storages). I guess the best way would a python script that uses urllib.
> > Something that would algorithmically pump up the DB to > 1GB in size
> > and retrieve the URL's. Any volunteers or am I doing it in my
> > copious spare time (tm)?
> 
> It would be great if you could do it, but beware that you will be
> benchmarking a lot of overhead if you only plan to measure storage
> performance. Why not use ZODB directly ?

If I talk HTTP, it measures things fully - Python's interpreter lock
will mean a storage system written in python will benchmark better
without having to compete with ZServer, and vice versa for storage
systems with non-pythonic bits.

> > I've got a nice NetApp here to run some tests on.
> 
> What filesystem does that use ?

No idea :-) Something log based that is very fast and handles huge
directories happily. It also appears that another member of this
list has an EMC Symmetrix box to test on, which I believe is the next (and 
highest) level up from a Netapp.

I've attached a prerelease alpha of zouch.py for giggles. Not even a
command line yet, so you will need to edit some code at the bottom.
The current settings generate about 360 directories and about 36000 files,
and proceeds to make about 180000 reads. This bloated by test ZODB
to just over 200MB and took about 2.6 hours attacking my development Zope
server from another host on my LAN.

Todo:
    tidy and vet ugly code
    command line interface
    dynamic option (do more intensive DTML stuff - currently just 
        standard_html_header/standard_html_footer)
    catalog option (since DTML Documents arn't catalog aware, will need
        to make two calls to make a new document)
    upload larger documents and some binaries (200MB isn't great for 
        benchmarking when you might have a gig of ram doing caching for you)
    standard test suite
    better reporting
    spinning dohicky so we know it hasn't hung without having to look
        at log files

-- 
Stuart Bishop                          Work: [EMAIL PROTECTED]
Senior Systems Alchemist               Play: [EMAIL PROTECTED]
Computer Science, RMIT University

#!/bin/env python
'''
$Id: zouch.py,v 1.3 2000/06/12 04:23:01 zen Exp $

Zouch - the Zope torture tester
'''

import whrandom
import sha
import threading
import ftplib
import httplib

from string import split,join,replace
from time import time,strftime,localtime,sleep
from StringIO import StringIO
from Queue import Queue
from threading import Thread,RLock
from urllib import urlencode
from urlparse import urlparse
from base64 import encodestring

retries = 10
retrysleep = 1

def debug(msg): 
    print 'D: %s - %s' % (threading.currentThread().getName(),msg)

# Fatal exceptions will not be caught
class FatalException(Exception): pass
class UnsupportedProtocol(FatalException): pass

class FolderLock:

    def __init__(self):
        self.locks = {}
        self.sync = RLock()
    
    def lock(self,dirs):
        self._lock(self._mypath(dirs))
        self._lock(self._parentpath(dirs))

    def unlock(self,dirs):
        self._unlock(self._parentpath(dirs))
        self._unlock(self._mypath(dirs))

    def _parentpath(self,dirs):
        if len(dirs) == 1:
            return 'root'
        else:
            return join(dirs[:-1],'/')

    def _mypath(self,dirs):
        return join(dirs,'/')

    def _lock(self,d):
        locks = self.locks
        sync = self.sync

        while 1:
            try:
                sync.acquire()
                acq = 1
                if locks.has_key(d):
                    l = locks[d]
                    sync.release()
                    acq = 0
                    l.acquire()
                    l.release()
                else:
                    l = RLock()
                    l.acquire()
                    locks[d] = l
                    break
            finally:
                if acq: sync.release()

    def _unlock(self,d):
        locks = self.locks
        sync = self.sync

        sync.acquire()
        try:
            l = locks[d]
            del locks[d]
            l.release()
        finally:
            sync.release()

folderlock = FolderLock()

class HTTPMaker:
    'Baseclass for HTTP Maker classes'

    def __init__(self,queue,url,username,password):

        purl = urlparse(url)

        host,port = split(purl[1],':',1)
        path = purl[2]
        if port:
            port = int(port)
        else:
            port = 80
        
        if path[-1] == '/':
            self.path = path
        else:
            self.path = path + '/'

        self.queue = queue
        self.ops = 0

        if username is None:
            self.auth = None
        else:
            if password is None: password = ''
            self.auth = 'Basic %s' % \
                replace(encodestring('%s:%s' % (username,password)),'\012','')

        #self.http = httplib.HTTP(host,port)
        self.ops = 0
        self.host = host
        self.port = port

    def doget(self,getcmd):
        http = None
        for i in range(0,retries):
            try:
                http = httplib.HTTP(self.host,self.port)
                break
            except:
                sleep(retrysleep)
        if http is None:
            debug('Failed %s' % getcmd)
            return None
        auth = self.auth
        http.putrequest('GET', getcmd)
        http.putheader('Accept','text/html')
        http.putheader('Accept','text/plain')
        if auth is not None:
            http.putheader('Authorization',auth)
        http.endheaders()
        errcode,errmsg,headers = http.getreply()
        if errcode not in (200,302):
            debug('%s ret %d %s' % (getcmd,errcode,errmsg))
            return None
        f = http.getfile()      # Suck in the data since we are torturing
        junk = f.readlines()
        f.close()
        return junk

class HTTPFolderMaker(HTTPMaker):
    def run(self):
        queue = self.queue
        path = self.path

        dirs = queue.get()
        while dirs is not None:
            id = dirs[-1]
            parms = urlencode({'id': id,'title': 'Zouch folder ' + id})
            folderlock.lock(dirs)
            try:
                p = path+join(dirs[:-1],'/')+'/manage_addFolder?' + parms
                if self.doget(p) is None:
                    return
            finally:
                folderlock.unlock(dirs)
            self.ops = self.ops + 1
            dirs = queue.get()


class HTTPFileMaker(HTTPMaker):
    def run(self):
        queue = self.queue
        path = self.path

        l = queue.get()
        while l is not None:
            try:
                filepath,data = l
            except:
                debug('WTF?: %s' % l)
                raise
            d = join(filepath[:-1],'/')
            id = filepath[-1]
            parms = urlencode({
                'id': id,
                'title': 'Zouch generated document ' + id,
                'file': data})
            folderlock.lock(filepath)
            try:
                p = path + d + '/manage_addDTMLDocument?' + parms
                if self.doget(p) is None:
                    return
            finally:
                folderlock.unlock(filepath)
            self.ops = self.ops + 1
            l = queue.get()

class HTTPFileReader(HTTPMaker):
    def run(self):
        queue = self.queue
        path = self.path

        filepath = queue.get()
        while filepath is not None:
            p = path + join(filepath,'/')
            if self.doget(p) is None:
                return
            self.ops = self.ops + 1
            filepath = queue.get()

class FTPMaker:
    'Baseclass for FTP Maker classes'

    def __init__(self,queue,url,username,password):

        purl = urlparse(url)

        host,port = split(purl[1],':',1)
        path = purl[2]
        if port:
            port = int(port)
        else:
            port = 21
        
        ftp = ftplib.FTP()
        #ftp.set_debuglevel(1)
        #ftp.set_pasv(1)

        if password is None: password = ''

        if len(path) > 0 and path[-1] <> '/':
            path = path + '/'   # Don't use os.path.join - this is FTP

        self.host = host
        self.port = port
        self.username = username
        self.password = password
        self.path = path
        self.ftp = ftp
        self.queue = queue

        self.ops = 0

    def __del__(self):
        try:
            self.ftp.quit()
            self.ftp.close()
        except:
            pass

    def run(self):
        try:
            ftp = self.ftp
            ftp.connect(self.host,self.port)
            if self.username:
                ftp.login(self.username,self.password)
            else:
                ftp.login()

            try:
                self.feed()
            finally:
                ftp.quit()
        except ftplib.all_errors:
            self.ops = -1       # Never got a chance to connect

class FTPFolderMaker(FTPMaker):

    def feed(self):
        queue = self.queue
        path = self.path
        ftp = self.ftp

        dirs = queue.get()
        try:
            while dirs is not None:
                try:
                    folderlock.lock(dirs)
                    ftp.mkd(path + join(dirs,'/'))
                finally:
                    folderlock.unlock(dirs)
                self.ops = self.ops + 1
                dirs = queue.get()
        except ftplib.error_perm:
            debug('Failed to create directory %s%s' % (path,join(dirs,'/')))

class FTPFileMaker(FTPMaker):

    def feed(self):
        queue = self.queue
        path = self.path
        ftp = self.ftp

        l = queue.get()

        try:
            while l is not None:
                filepath,data = l
                data = StringIO(data)
                folderlock.lock(filepath)
                try:
                    ftp.storlines('STOR %s%s' % (path,join(filepath,'/')),data)
                    self.ops = self.ops + 1
                finally:
                    try:data.close()
                    except: pass
                    folderlock.unlock(filepath)
                l = queue.get()
        except ftplib.error_perm:
            debug('Failed to upload file %s%s' % (path,join(filepath,'/')))


class Zouch:
    'Feeds commands to the clients'

    def __init__(self,url,username=None,password=None):
        '''
        Create a feeder to feed our consumers.
        '''

        # Setup and seed our random number generator - seed to known
        # value to sequence repeats identically each run
        self._rng = whrandom.whrandom()
        self._rng.seed(1,5,2)

        self.rootfolder = strftime('%Y%m%d_%H%M%S',localtime(time()))

        if url[-1] <> '/':
            url = url + '/'

        self.url = url
        self.numthreads = 4
        self.folders = 5
        self.depth = 3
        self.files = 20
        self.username = username
        self.password = password

    def run(self):
        self.purl = urlparse(self.url)

        protocol = self.purl[0]

        #if protocol == 'ftp':
        #    FolderMaker = FTPFolderMaker
        #    FileMaker = FTPFileMaker
        if protocol == 'http':
            FolderMaker = HTTPFolderMaker
            FileMaker = HTTPFileMaker
            FileReader = HTTPFileReader
        else:
            raise UnsupportedProtocol(protocol)

        print "Base URL is %s%s" % (self.url,self.rootfolder)

        self.timeit('Generating folders',FolderMaker,self.makefolders)
        self.timeit('Generating files',FileMaker,self.makefiles)
        self.timeit('Reading files',FileReader,self.readfiles)
        #self.timeit('Trashing files',FolderMaker,self.trashfiles)
        #self.timeit('Trashing folders',FolderMaker,self.trashfolders)
    
    def timeit(self,txt,MakerClass,fillmethod):

        numthreads = self.numthreads
        queue = Queue(self.queuesize)
        self.queue = queue
        url = self.url
        username = self.username
        password = self.password

        print txt,
        threads = []
        makers = []
        starttime = time()
        
        for i in range(0,numthreads):
            m = MakerClass(queue,url,username,password)
            makers.append(m)

            t = Thread(target=m.run,name='Zouch maker %d' % i)
            t.setDaemon(1)
            threads.append(t)

        for i in range(0,numthreads):
            threads[i].start()

        t = Thread(target=fillmethod,name='Zouch filler')
        t.setDaemon(1)
        t.start()

        for t in threads:
            if t.isAlive():
                t.join()

        endtime = time()
        print ' %f seconds' % (endtime - starttime)
        threads = None
        print 'Thread[Operations] -',
        tot = 0
        for i in range(0,len(makers)):
            print ' %d[%d]' % (i,makers[i].ops),
            tot = tot + makers[i].ops
        print
        print 'Total operations = %d' % tot
            
    def makefolders(self):
        depth = self.depth
        queue = self.queue
        i = -1
        while 1:
            p = self.folderpath(i)
            if len(p) > depth+1: # depth+1 for top folder
                break
            queue.put(self.folderpath(i))
            i = i + 1
        self.numfolders = i
        for i in range(0,self.numthreads):
            queue.put(None)     # Make sure our makers know to terminate

    def makefiles(self):
        queue = self.queue

        # standard_html_header
        filepath = self.folderpath(-1)
        filepath.append('standard_html_header')
        data = '''
            <HTML><HEAD><TITLE>&dtml-title_or_id;</TITLE></HEAD>
            <BODY BGCOLOR="#FFFFFF">
        '''
        queue.put((filepath,data))

        # standard_html_footer
        filepath = self.folderpath(-1)
        filepath.append('standard_html_footer')
        data = '</BODY></HTML>'
        queue.put((filepath,data))

        # standard_error_message
        filepath = self.folderpath(-1)
        filepath.append('standard_error_message')
        data = '''
            <dtml-var standard_html_header>
            <dtml-if error_message>&dtml-error_message;
            <dtml-else>
                <H2>Zope Error</H2>
                  <P>Zope has encountered an error while publishing this resource.
                  </P>
      
                <P><STRONG>Error Type: <!--#var error_type--></STRONG><BR>
                   <STRONG>Error Value: <!--#var error_value--></STRONG><BR> 
                </P>
     
                <P>For more detailed information about the error, please
                refer to the HTML source for this page.
                </P>
            </dtml-if>
            <dtml-var standard_html_footer>
        '''
        queue.put((filepath,data))

        i = 0
        for filenum in range(0,self.files):
            for foldnum in range(0,self.numfolders):
                filepath = self.folderpath(foldnum)
                filepath.append('f%03d' % filenum)
                queue.put((filepath,self.genfile()))
                i = i + 1
        self.numfiles = i
        for i in range(0,self.numthreads):
            queue.put(None)     # Make sure our makers know to terminate

    def readfiles(self):
        files = self.files
        folders = self.folders
        rng = self._rng
        queue = self.queue
        for i in range(0,self.numfiles * self.reads):
            foldnum = rng.randint(0,folders-1)
            filenum = rng.randint(0,files-1)
            queue.put(self.filepath(foldnum,filenum))
            
        for i in range(0,self.numthreads):
            queue.put(None)     # Make sure our makers know to terminate

    def folderpath(self,num):
        '''
        Generate the full path to folder number 'num'. Returns a list
        of path components. Root folder is num=-1. First folder is num=0.
        '''
        folders = self.folders
        l = [self.rootfolder,]
        num = num + 1
        while num > 0:
            n = (num-1) % folders
            l.append('d%03d' % n)
            num = (num - n) / folders
        return l

    def filepath(self,foldnum,filenum):
        filepath = self.folderpath(foldnum)
        filepath.append('f%03d' % filenum)
        return filepath


    def genfile(self):
        return '''
        <dtml-var standard_html_header>
        <h2><dtml-var title_or_id></h2>
        <p>
        This is the <dtml-var id> Document.
        </p>
        <dtml-var standard_html_footer>
        '''

if __name__ == '__main__':
    print __doc__

    url = 'http://your.site.here:8080/dumparea'
    uname = 'zouch'
    passwd = 'zouch'

    f = Zouch(url,uname,passwd)
    f.folders = 3 
    f.depth = 5
    f.files = 100
    f.reads = 5 # This is a multiplier - readops = totalfiles * reads
    f.numthreads = 10

    f.queuesize = 100
    f.retries = 10
    f.retrysleep = 1

    f.run()

Reply via email to