On 12.03.2012 23:23, Abhishek Pratap wrote:
Super awesome. I love how the python community in general keeps the
recordings available for free.

@Adam : I do have some problems that I can hit numpy with, mainly
bigData based. So in summary I have millions/billions of rows of
biological data on which I want to run some computation but at the
same time have a capability to do quick lookup. I am not sure if numpy
will be applicable for quick lookups  by a string based key right ??


Jason Kinser's book on Python for bioinformatics might be of interest. Though I don't always agree with his NumPy coding style.

As for "big data", it is a problem regardless of language. The HDF5 library might be of help (cf. PyTables or h5py, I actually prefer the latter).

With a 64 bit system it is also possible to memory map a temporary file, and tell the OS to keep as much of it in memory if possible. That way we can "fake" more RAM than we actually have. (The Linux equivalent of the code in bigmem.c would be to mmap from tmpfs.) A usecase for bigmem.c is e.g. if you need to use 10 tables that each are 1-2 GB in size, but only have 4 GB of RAM on the desktop computer.


Sturla










/* (C) Sturla Molden, University of Oslo */


#include <Windows.h>
#include <tchar.h>
#include <stdio.h>
#include <Lmcons.h>

typedef struct {
    TCHAR   filename [MAX_PATH + 1];    
    HANDLE  hFile, hMap;
    SIZE_T  size;
    void    *data;
} blob;

#define PAGE_SIZE 4096
/* #define EXPORT __declspec(dllexport) */
#define EXPORT

EXPORT
void *bigmem_alloc(size_t n)
{
    TCHAR temppath [MAX_PATH + 1];
    TCHAR prefix [MAX_PATH + 1];
    TCHAR username [UNLEN + 1]; /* UNLEN is defined in Lmcons.h */
    DWORD dwHigh, dwLow, dwPid, dwMaxUsername;
    UINT unique;
    size_t metapages = 1;
    char *buffer;
            
    /* allocate memory for the metadata */
    blob *b = (blob*)malloc(sizeof(blob));
    if (b == (blob*)NULL) goto error;
    ZeroMemory(b,sizeof(blob));
    ZeroMemory(temppath, sizeof(temppath));
    ZeroMemory(prefix, sizeof(prefix));
    ZeroMemory(username, sizeof(username));
    b->hFile = INVALID_HANDLE_VALUE;
    b->size = n;   

    /* setup prefix from PID and UID */
    dwPid = GetCurrentProcessId();
    dwMaxUsername = (DWORD)UNLEN;
    if(GetUserName(username,&dwMaxUsername)==0) goto error;
    _sntprintf(prefix, MAX_PATH, _T("bigmem-swapfile-%s-process%ul"), username, 
dwPid);
            
    /* get extra pages to store blob struct */
    while(metapages*PAGE_SIZE < sizeof(blob)) metapages++; 
        
    /* create temporary file backed by memory as much as possible, 
     * unique name, and automatically deleted on close */
    
    if (GetTempPath(MAX_PATH, temppath)==0) goto error;

    for (unique=1; unique<65535; unique++) {
        
        /* create a tempfile name without creating the file */
        if (GetTempFileName(temppath, prefix, unique, b->filename)==0) goto 
error;
        
        /* try to create the file, fail if it already exists
         * we don't need a mutex due to the CREATE_NEW flag */
        b->hFile = CreateFile(b->filename, GENERIC_READ | GENERIC_WRITE, 0, 
NULL, CREATE_NEW, 
                    FILE_ATTRIBUTE_TEMPORARY|FILE_FLAG_DELETE_ON_CLOSE, NULL);
        
        /* if the file was created, we can continue */                
        if (b->hFile != INVALID_HANDLE_VALUE) break;
        
        /* make sure the next call to GetTempFileName produce a
         * NULL terminated string */
        ZeroMemory(b->filename, sizeof(b->filename));
    }                
    if (b->hFile == INVALID_HANDLE_VALUE) goto error;                
                                        
    /* create file mapping object */
    dwLow = (DWORD)(0x00000000FFFFFFFF & (n + metapages*PAGE_SIZE));
    dwHigh = (DWORD)((0xFFFFFFFF00000000 & (n + metapages*PAGE_SIZE)) >> 32);
    b->hMap = CreateFileMapping(b->hFile, NULL, PAGE_READWRITE, dwHigh, dwLow, 
NULL);
    if (b->hMap == (HANDLE)NULL) goto error;
    
    /* map view of file */  
    b->data = MapViewOfFile(b->hMap, FILE_MAP_ALL_ACCESS, 0, 0, (n + 
metapages*PAGE_SIZE));
    if (b->data == NULL) goto error;
    
    /* copy the blob struct to the page before the 
     * buffer we will return */
    buffer = (char*)b->data + metapages*PAGE_SIZE;
    CopyMemory(buffer-sizeof(blob), b, sizeof(blob));
    FlushViewOfFile(b->data, metapages*PAGE_SIZE);
    free(b);    
    return (void*)buffer;

error:    
    if (b == (blob*)NULL) return NULL;
    if (b->hMap) CloseHandle(b->hMap);
    if (b->hFile != INVALID_HANDLE_VALUE) CloseHandle(b->hFile);
    free(b);
    return NULL;    
}


EXPORT
void bigmem_flush(void *base, size_t n)
{
    FlushViewOfFile((LPCVOID)base,(SIZE_T)n);
}


EXPORT
void bigmem_free(void *buffer)
{
    blob *b;
    HANDLE hFile, hMap;
    void *data;    
    b = (blob*)((char*)buffer-sizeof(blob));
    hFile = b->hFile;
    hMap = b->hMap;
    data = b->data;    
    UnmapViewOfFile(data);
    CloseHandle(hMap);
    CloseHandle(hFile);
}




_______________________________________________
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to