Sorry for late reply.
My implementation as follow:
I modified the original algorithm, when cur state is root, using the 2-gram 
hash table to accelerate filtering.
In following implementation, building dat table dependent on the Trie structure 
that had been builded.

References:
http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=31365&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D31365

---------------------------------------
#include <stdio.h>

#include <stdlib.h>

#include "common.h"

#include "my_dat_ac.h"



#define T_SIZE 655360

#define printf(fmt,arg...)







struct input_list {

        unsigned char node;

        struct bfs_list *next;

};



enum {

        T_MUL_ST = 1,   // multi-state

        T_SIN_ST,   // sigle-state

        T_TER_ST,   // terminal-state

        T_SEP_ST        // separate-state

};



struct _state_node {

        struct my_ac_patt * list;

        uint32_t fail;

};



struct _state_build {

        struct input_list *valid_input_bfs;

        struct input_list *valid_input_bfs_last;

        uint16_t type;

        uint8_t from_char;

        struct _state_node state;

};



struct _state_build build_new[T_SIZE] = {};

static uint32_t new[T_SIZE] = {};          



static uint32_t base[T_SIZE] = {};

static uint32_t check[T_SIZE] = {};

static struct _state_node check_match[T_SIZE] = {};



static uint32_t old[T_SIZE] = {};       



static uint32_t dat_hash[65536] = {};       



static int my_input_enqueue(struct input_list **bfs, struct input_list **last, 
unsigned char n)

{

        struct input_list *new;



        new = (struct input_list *) malloc(sizeof(struct input_list));

        if(!new) {

                cli_errmsg("bfs_enqueue: Can't allocate memory for 
input_list\n");

                return CL_EMEM;

        }

        new->next = NULL;

        new->node = n;



        if(*last) {

                (*last)->next = new;

                *last = new;

        } else {

                *bfs = *last = new;

        }



        return CL_SUCCESS;

}



static int my_input_dequeue(struct input_list **bfs, struct input_list **last)

{

        struct input_list *lpt;

        int pt;



        if(!(lpt = *bfs)) {

                return -1;

        } else {

                *bfs = (*bfs)->next;

                pt = lpt->node;

                if(lpt == *last)

                        *last = NULL;

                free(lpt);

                return pt;

        }

}



static int my_input_queue_foreach(struct input_list **bfs)

{

        struct input_list *lpt;

        int pt;



        if(!(lpt = *bfs)) {

                return -1;

        } else {

                *bfs = (*bfs)->next;

                pt = lpt->node;

                return pt;

        }

}



extern struct bitmap_state *get_bit_popcnt_next_state(struct bitmap_state 
*pcur_state, unsigned char a);

static int max_check_index = 0;

int my_dat_ac_build(struct my_matcher *root)

{

        int count1 =0;

        struct bfs_list *multi_bfs = NULL, *multi_bfs_last = NULL;

        struct bfs_list *sep_bfs = NULL, *sep_bfs_last = NULL;

        int res;

        uint8_t a;

        struct input_list *tmp;

        //init value------s : = 1 ; new(s) : = 1 ; queue := { s } ;

        int i;

        for(i=0;i<T_SIZE;i++)

                base[i] = 2;

        struct bitmap_state *bitmap_st = root->ac_root;

        struct bitmap_state *bitmap_st1;



        uint32_t s = bitmap_st->state_id , s1;

        new[s] = bitmap_st->state_id;

        old[bitmap_st->state_id] = s;

        //base[new[s]] = bitmap_st->state_id;

        my_bfs_enqueue(&multi_bfs, &multi_bfs_last, (void*)bitmap_st);

        /* Routine for multistates */

        while((bitmap_st = my_bfs_dequeue(&multi_bfs, &multi_bfs_last)) != 
(void*)queue_NULL) {

                s = (uint32_t)bitmap_st->state_id;

                printf("-----M-----%03d(%03d)----------\n", s, new[s]);

                /* Let s be the next state in queue */

abcd:

                tmp = build_new[s].valid_input_bfs;

                while((res = my_input_queue_foreach(&tmp)) != -1) {

                        a = (uint8_t)res;

                        if(check[base[new[s]] + a] != 0){

                                base[new[s]] = base[new[s]] + 1;

                                goto abcd;

                        }

                }



                while((res = my_input_dequeue(&build_new[s].valid_input_bfs, 
&build_new[s].valid_input_bfs_last)) != -1) {

                        /*for each a in list[s]*/

                        a = (uint8_t)res;

//                      while(check[base[new[s]] + a] != 0){

//                              base[new[s]] = base[new[s]] + 1;

//                      }

                        check[base[new[s]] + a] = new[s];

                        bitmap_st1 = get_bit_popcnt_next_state(bitmap_st, a);

                        s1 = bitmap_st1->state_id;

                        new[s1] = base[new[s]] + a;

                        old[base[new[s]] + a] = s1;

                        if(new[s1] > max_check_index){

                                max_check_index = new[s1];

                        }

                        printf("o_stat %03d -%02x- new_stat %03d;;;;%d\n", s1, 
a, new[s1], ++count1);



                        //build hash

                        if(bitmap_st1->level == 2){

                                uint8_t hash_str[2];

                                hash_str[0] = build_new[s].from_char;

                                hash_str[1] = build_new[s1].from_char;

                                uint16_t hash_val = *(uint16_t*)hash_str;

                                dat_hash[hash_val] = new[s1];

                        }



//                      check_match[new[s1]].fail = 
new[build_new[s1].state.fail];

//                      check_match[new[s1]].list = build_new[s1].state.list;



                        if(build_new[s1].type != T_SEP_ST){

                                my_bfs_enqueue(&multi_bfs, &multi_bfs_last, 
(void*)bitmap_st1);

                        }

                        else {

                                my_bfs_enqueue(&sep_bfs, &sep_bfs_last, 
(void*)bitmap_st1);

                        }

                }

        }



        /* Routine for single state */

        while((bitmap_st = my_bfs_dequeue(&sep_bfs, &sep_bfs_last)) != 
queue_NULL) {

                s = (uint32_t)bitmap_st->state_id;

                printf("-----S-----%03d(%03d)----------\n", s, new[s]);

                while(build_new[s].type != T_TER_ST){

                        a = 
(uint8_t)my_input_dequeue(&build_new[s].valid_input_bfs, 
&build_new[s].valid_input_bfs_last);

                        while(check[base[new[s]] + a] != 0){

                                base[new[s]] = base[new[s]] + 1;

                        }

                        check[base[new[s]] + a] = new[s];

                        bitmap_st1 = get_bit_popcnt_next_state(bitmap_st, a);

                        s1 = bitmap_st1->state_id;

                        new[s1] = base[new[s]] + a;

                        old[base[new[s]] + a] = s1;

                        if(new[s1] > max_check_index){

                                max_check_index = new[s1];

                        }



                        //build hash

                        if(bitmap_st1->level == 2){

                                uint8_t hash_str[2];

                                hash_str[0] = build_new[s].from_char;

                                hash_str[1] = build_new[s1].from_char;

                                uint16_t hash_val = *(uint16_t*)hash_str;

                                dat_hash[hash_val] = new[s1];

                        }



                        printf("o_stat %03d -%02x- new_stat %03d;;;;%d\n", s1, 
a, new[s1], ++count1);

//                      check_match[new[s1]].fail = 
new[build_new[s1].state.fail];

//                      check_match[new[s1]].list = build_new[s1].state.list;

                        s = s1;

                        bitmap_st = bitmap_st1;

                }

        }

        printf("max_check_index %d\n", max_check_index);

        for(i = 0;i<=max_check_index;i++)

        {

                check_match[i].fail = new[build_new[old[i]].state.fail];

                check_match[i].list = build_new[old[i]].state.list;

        }



        return 0;

}

static unsigned int fail_count[32] = {};

uint32_t my_dat_ac_fail_proc(unsigned char a, uint32_t s)

{

#if 0

//      return 1;

//      uint32_t t;

//      s = check_match[s].fail;

//      t = a + base[s];

//      if(check[t] != s)

//              return 1;

//      else

//              return t;



        static int tdfsdfasdf = 1;

        tdfsdfasdf++;

        return 1;

#else

        uint32_t t;

        int i = 0;

        do{

                fail_count[i++]++;

                if(s == 1) return s;

                s = check_match[s].fail;

                t = a + base[s];

        }while(check[t] != s);

        return t;

#endif

}



static uint32_t st_hash_fail_count = 0;

static uint32_t st_hash_ref_count = 0;

uint32_t my_dat_ac_fail_proc_hash(uint32_t s, uint8_t *buffer, uint32_t *i, 
uint32_t length)

{

        uint32_t t;

        int k = 0;

        uint32_t j;

        do{

                fail_count[k++]++;

                if(s == 1){//root

loop:

                        j = *i;

                        //hash search

                        for(;j<length-1;j++)

                        {

                                st_hash_ref_count++;

                                if(dat_hash[*(uint16_t*)(buffer+j)]){

                                        *i = j+1;

                                        return dat_hash[*(uint16_t*)(buffer+j)];

                                }

                                st_hash_fail_count++;

                        }

                        *i = j+1;

                        return 0;

                }

                s = check_match[s].fail;

                if(s == 1)

                        goto loop;

                t = buffer[*i] + base[s];

        }while(check[t] != s);

        return t;

}



int my_dat_ac_scanbuff(const unsigned char *buffer, uint32_t length,const 
struct my_matcher *root)

{

        int i;

//      for(i = 1;i< root->ac_nodes+1;i++)

//      {

//              //printf("old s %03d 's type is %d\n", i, build_new[i].type);

//              printf("old s %03d  new_stat is %d\n", i, new[i]);

//      }

//      for(i = 0;i< T_SIZE;i++)

//      {

//              if(check_match[i].fail == 0)

//              printf("stat %05d fail null[%d]\n", old[i], i);

//      }

//      fprintf(stdout, "trans num: %d\n", my_ac_trans_sum(root));

        //return 0;

        memset((void*)fail_count, 0x00, sizeof(fail_count));

        uint32_t s = 1, t;

        printf("my_dat_ac_scanbuff\n", i, build_new[i].type);

        int count = 0;

        for (i = 0; i < length; i++) {

                t = buffer[i] + base[s];

                if(check[t] != s){

                        //fail

                        //t = my_dat_ac_fail_proc(buffer[i], s);

                        t = my_dat_ac_fail_proc_hash(s, buffer, &i, length);

                        //continue;

                }

//              if(s == 1)

//                      st_hash_ref_count++;

                printf("%05d  --%02x-->  %05d\n", old[s], buffer[i], old[t]);

                s=t;

                if(check_match[s].list){

                        //final

                        //fprintf(stdout, "hit:sigid %d:pos %d:\n", 
check_match[s].list->sigid, i);

                        count++;

                }

        }

//      fprintf(stdout, "hit count %d, st_hash_fail_count %u st_hash_ref_count 
%u\n", count, st_hash_fail_count, st_hash_ref_count);

//      for(i = 0;i<8;i++)

//              fprintf(stdout, "%u,", fail_count[i]);

//      fprintf(stdout, "\n");

        return s;

}



int my_dat_update_input(uint32_t s, unsigned char a, uint32_t t)

{

        printf("id:%03d -- %02x -- id:%03d\n", s, a, t);

        my_input_enqueue(&build_new[s].valid_input_bfs, 
&build_new[s].valid_input_bfs_last, a);

        build_new[t].from_char = a;

        return 0;

}



int my_dat_update_patlist(uint32_t s, struct my_ac_patt *list)

{

        build_new[s].state.list = list;

        return 0;

}



int my_dat_update_fail(uint32_t s,uint32_t fail)

{

        build_new[s].state.fail = fail;

        return 0;

}



int my_dat_update_terminal(uint32_t s)

{

        build_new[s].type = T_TER_ST;

        return 0;

}



int my_dat_update_type(struct bitmap_state **st_array, uint16_t array_len)

{

        int i = array_len-1;



        if(build_new[st_array[i]->state_id].type !=0)

                return 0;



        int flag = 0;

        for(; i>=0;i--)

        {

                if(st_array[i]->sumary[_bitmap_group] > 1 || flag)

                {

                        build_new[st_array[i]->state_id].type = T_MUL_ST;

                        flag = 1;

                }

                else

                        build_new[st_array[i]->state_id].type = T_SEP_ST;

        }

        return 0;

}





_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net

Reply via email to