#include <vector>
#include <string.h>
#include <iostream>
#include <cmath>
#include <unordered_map>
#include <algorithm>
#include <fstream>
#include <iomanip>
#include "BOBHash.h"
#include "UseSketch.h"

using namespace std;
#define upper 1.08          //exponential decay parameter
#define KEY_SIZE 4      //size of key
//frequently used value
#define FP_LEN1 8
#define FP_LEN2 12
#define FP_LEN3 16

#define FP_MASK1 0xFF
#define FP_MASK2 0xFFF
#define FP_MASK3 0xFFFF

#define CT_MASK1 0x7F
#define CT_MASK2 0xFFF
#define CT_MASK3 0x7FFF

#define CELL_LEN1 15
#define CELL_LEN2 24
#define CELL_LEN3 31

#define CELL_MASK1 0x7FFF
#define CELL_MASK2 0xFFFFFF
#define CELL_MASK3 0x7FFFFFFF

#define META_LENGTH 4
#define META_MASK 0xF
#define BUCKET_CELLS_LEGTH 124

#define EXP_MODE_MASK 0x4000
//counters >= this threshold cannot be directly decayed
#define EXP_MODE_T 0x4001
//mask for reset the counting part of the two mode active counter
#define EXP_CT_MASK 0x3FF0
//mask to check if the counting part will overflow
#define EXP_CT_OVFL_MASK 0x7FF0

#define MAX_NUM_LV1 8
#define MAX_NUM_LV2 5
#define MAX_NUM_LV3 4

//value of counters that just Switch (not the represented value of counters, just the value directly read from counter)
#define MIN_C_LV_2 256
#define MIN_C_LV_3 4096

inline uint32_t BKDRHash(const uint8_t *str, uint32_t len, uint32_t seed=1000) {

    uint32_t hash = 0;

    for (uint32_t i = 0; i < len; i++) {
        hash = hash * seed + str[i];
    }

    return (hash & 0x7FFFFFFF);
}

inline uint16_t finger_print_16(uint32_t hash) {
    hash ^= hash >> 16;
    hash *= 0x85ebca6b;
    hash ^= hash >> 13;
    hash *= 0xc2b2ae35;
    hash ^= hash >> 16;
    return hash & 65535;
}

inline void metaCodeToData(uint8_t metaCode, int &num_lv_1, int &num_lv_2, int &num_lv_3) {
    switch (metaCode) {
        case (0):
            num_lv_3 = 0;
            num_lv_2 = 0;
            num_lv_1 = 8;
            break;
        case (1):
            num_lv_3 = 0;
            num_lv_2 = 1;
            num_lv_1 = 6;
            break;
        case (2):
            num_lv_3 = 0;
            num_lv_2 = 2;
            num_lv_1 = 5;
            break;
        case (3):
            num_lv_3 = 0;
            num_lv_2 = 3;
            num_lv_1 = 3;
            break;
        case (4):
            num_lv_3 = 0;
            num_lv_2 = 4;
            num_lv_1 = 1;
            break;
        case (5):
            num_lv_3 = 0;
            num_lv_2 = 5;
            num_lv_1 = 0;
            break;
        case (6):
            num_lv_3 = 1;
            num_lv_2 = 0;
            num_lv_1 = 6;
            break;
        case (7):
            num_lv_3 = 1;
            num_lv_2 = 1;
            num_lv_1 = 4;
            break;
        case (8):
            num_lv_3 = 1;
            num_lv_2 = 2;
            num_lv_1 = 3;
            break;
        case (9):
            num_lv_3 = 1;
            num_lv_2 = 3;
            num_lv_1 = 1;
            break;
        case (10):
            num_lv_3 = 2;
            num_lv_2 = 0;
            num_lv_1 = 4;
            break;
        case (11):
            num_lv_3 = 2;
            num_lv_2 = 1;
            num_lv_1 = 2;
            break;
        case (12):
            num_lv_3 = 2;
            num_lv_2 = 2;
            num_lv_1 = 0;
            break;
        case (13):
            num_lv_3 = 3;
            num_lv_2 = 0;
            num_lv_1 = 2;
            break;
        case (14):
            num_lv_3 = 3;
            num_lv_2 = 1;
            num_lv_1 = 0;
            break;
        case (15):
            num_lv_3 = 4;
            num_lv_2 = 0;
            num_lv_1 = 0;
            break;
        default:
            break;
    }
}

int getMetaCode(int new_num_lv_2, int new_num_lv_3) {
    switch (new_num_lv_3) {
        case 0:
            switch (new_num_lv_2) {
                case 0:
                    return 0;
                case 1:
                    return 1;
                case 2:
                    return 2;
                case 3:
                    return 3;
                case 4:
                    return 4;
                case 5:
                    return 5;
                default:
                    return -1;
            }
            break;
        case 1:
            switch (new_num_lv_2) {
                case 0:
                    return 6;
                case 1:
                    return 7;
                case 2:
                    return 8;
                case 3:
                    return 9;
                default:
                    return -1;
            }
            break;
        case 2:
            switch (new_num_lv_2) {
                case 0:
                    return 10;
                case 1:
                    return 11;
                case 2:
                    return 12;
                default:
                    return -1;
            }
            break;
        case 3:
            switch (new_num_lv_2) {
                case 0:
                    return 13;
                case 1:
                    return 14;
                default:
                    return -1;
            }
            break;
        case 4:
            if (new_num_lv_2 == 0) {
                return 15;
            } else {
                cout << "encode error!";
                return -1;
            }
    }
    return -1;
}

void findMinCell(uint64_t *bucket, int level, const int &num_lv_1, const int &num_lv_2, const int &num_lv_3,
                 uint16_t &min_counter, uint16_t &min_index,uint16_t &min_cell) {
    switch (level) {
        case 1: {
            min_counter = -1;
            min_index = -1;
            min_cell=num_lv_3+num_lv_2;
            uint32_t start_lv1 = META_LENGTH + num_lv_3 * CELL_LEN3 + num_lv_2 * CELL_LEN2;
            uint32_t end_lv1 = start_lv1 + num_lv_1 * CELL_LEN1;    
            uint16_t tmp_counter = 0;
            
            for (uint32_t j = start_lv1 + FP_LEN1; j < end_lv1; j += CELL_LEN1) {
                tmp_counter = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & CT_MASK1;
                if (tmp_counter < min_counter) {
                    min_counter = tmp_counter;
                    min_index = j - FP_LEN1;
                }
                min_cell++;
            }
            return;
        }
        case 2: {
            min_counter = -1;
            min_index = -1;
            min_cell=num_lv_3;
            uint32_t start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;
            uint32_t end24 = start_lv2 + num_lv_2 * CELL_LEN2;

            uint16_t tmp_counter = 0;
            for (uint32_t j = start_lv2 + FP_LEN2; j < end24; j += CELL_LEN2) {
                tmp_counter = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & CT_MASK2;

                if (tmp_counter < min_counter) {
                    min_counter = tmp_counter;
                    min_index = j - FP_LEN2;
                }
                min_cell++;
            }
            return;
        }
        case 3://only find the counters in normal mode
        {
            min_counter = EXP_MODE_T;
            min_index = -1;
            min_cell=0;
            uint16_t tmp_counter = 0;
            uint32_t start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;

            for (uint32_t j = META_LENGTH + FP_LEN3; j < start_lv2; j += CELL_LEN3) {
                tmp_counter = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & CT_MASK3;

                if (tmp_counter < min_counter) {
                    min_counter = tmp_counter;
                    min_index = j - FP_LEN3;
                }
                min_cell++;
            }
            return;
        }
    }
}



class SwitchSketch
{
private:
uint64_t **B;//array of buckets,128bits(2*64)
int bucket_num,m2;
BOBHash* bobhash;
BOBHash* bobhash1;
UseSketch* test;

public:
    SwitchSketch(int memory,int sketch){
		switch(sketch){
			case 1:{
				bucket_num=int(memory/(128+8+32));
				int m2=int(memory*0.75/8);
				cout<<"SwitchSketch_Elastic: table num"<<1<<"bucket num"<<bucket_num<<"light part"<<m2<<"cell num"<<8<<endl;
				test=new Elastic(1,bucket_num,8,m2);
				break;
			}
			case 2:{
				bucket_num=int(memory/(128+2*8+32));
				cout<<"SwitchSketch_Waving: table num"<<1<<"bucket num"<<bucket_num<<" cell num"<<8<<endl;
				test=new Waving(1,bucket_num,8);
				break;
			}
			case 3:{
				bucket_num=int(memory/128);
				cout<<"SwitchSketch_RAP: table num"<<1<<"bucket num"<<bucket_num<<" cell num"<<8<<endl;
				test=new RAP();
				break;
			}
			//default:{
			//	int bucket_len=64;
			//	bucket_num=memory/bucket_len/2;//two tables
			//}
		}
        bobhash = new BOBHash(1001);
        bobhash1 = new BOBHash(1213);
        B = new uint64_t*[bucket_num];
        for (int i = 0; i < bucket_num; ++i) {
            B[i] = new uint64_t[2];
            B[i][0]=0;
            B[i][1]=0;
        }
    }

    void Switch(uint64_t *bucket, int level, const int &num_lv_1, const int &num_lv_2, const int &num_lv_3, uint16_t fp16,
                uint32_t cell_start_bit_idx,int slot_idx,int cell_idx) {
        int long_fp=smallhash((fp16&FP_MASK1),slot_idx);
        int new_num_lv_1 = 0, new_num_lv_2 = 0, new_num_lv_3 = 0; //new number of level_1,level_2,level_3 cells
        int start_lv2 = 0, start_lv1 = 0, end_lv1 = 0;            //start index in bit
        int new_start_lv2 = 0, new_start_lv1 = 0;      //new start index in bit


        uint8_t cell_lv_1[MAX_NUM_LV1][2] = {0}; //Temporary variable to store flow in the bucket
        uint16_t cell_lv_2[MAX_NUM_LV2][2] = {0};
        uint16_t cell_lv_3[MAX_NUM_LV3][2] = {0};

        int usage_lv_1 = 0, usage_lv_2 = 0, usage_lv_3 = 0;        //number of used level_1,level_2,level_3 cells
        int i = 0, j = 0;
        double ranf = 0; //random number for exponential decay
        uint16_t tmp_fp = 0, tmp_counter = 0;

        uint16_t min_counter = 0;
        uint16_t min_index = -1;
        uint16_t min_cell;

        switch (level) {
            case 2: {
                new_num_lv_2 = num_lv_2 + 1; //get new_num_lv_2
                new_num_lv_3 = num_lv_3;     //get new_num_lv_3
                int bits_remain = BUCKET_CELLS_LEGTH - CELL_LEN3 * new_num_lv_3 - CELL_LEN2 * new_num_lv_2;
                //levelup success
                if (bits_remain >= 0) {
                    int pos=0;
                    new_num_lv_1 = bits_remain / CELL_LEN1;

                    start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;
                    start_lv1 = start_lv2 + num_lv_2 * CELL_LEN2;
                    end_lv1 = start_lv1 + num_lv_1 * CELL_LEN1;

                    //Temporary store level_3 flows
                    for (usage_lv_3 = 0, j = META_LENGTH; j < start_lv2; j += CELL_LEN3) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK3;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3))) >> ((j + FP_LEN3) & 0x7)) &
                                CT_MASK3;

                        if (tmp_counter > 0) {//pos is bogger than 0
                            cell_lv_3[usage_lv_3][0] = tmp_fp;
                            cell_lv_3[usage_lv_3][1] = tmp_counter;
                            test->write_temp(0,slot_idx,pos,usage_lv_3);
                            usage_lv_3++;
                        }
                        pos++;
                    }
                    //Temporary store level_2 flows
                    for (usage_lv_2 = 0, j = start_lv2; j < start_lv1; j += CELL_LEN2) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK2;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3))) >> ((j + FP_LEN2) & 0x7)) &
                                CT_MASK2;

                        if (tmp_counter > 0) {
                            cell_lv_2[usage_lv_2][0] = tmp_fp;
                            cell_lv_2[usage_lv_2][1] = tmp_counter;
                            test->write_temp(0,slot_idx,pos,usage_lv_3+usage_lv_2);
                            usage_lv_2++;
                        }
                        pos++;
                    }
                    int start=usage_lv_2;
                    //Temporary store  fp16
                    cell_lv_2[usage_lv_2][0] = fp16 & FP_MASK2;
                    cell_lv_2[usage_lv_2][1] = MIN_C_LV_2;
                    test->write_temp(0,slot_idx,cell_idx,usage_lv_3+usage_lv_2);
                    usage_lv_2++;
                    //<1><2><><4><5><6> pos=1 true=2
                    //<><2><1><4><5><6> pos=2,min=3,true=3
                    //<><2><1><4><5><6> pos=3 min=3 jump
                    //<><2><1><4><5><6> pos=4 min=3 jump
                    uint16_t min_f = 0,minpos=0; 
                    //Temporary store level_1 flows except fp16 and min_f
                    for (j = start_lv1; j < end_lv1; j += CELL_LEN1) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK1;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3))) >> ((j + FP_LEN1) & 0x7)) &
                                CT_MASK1;
                        if (tmp_counter && (tmp_fp != (fp16 & FP_MASK1))) {
                            if (!min_counter) {
                                min_f = tmp_fp;
                                min_counter = tmp_counter;
                                minpos=pos;
                            } else if (tmp_counter < min_counter) {
                                cell_lv_1[usage_lv_1][0] = min_f;
                                cell_lv_1[usage_lv_1][1] = min_counter;
                                test->write_temp(0,slot_idx,minpos,usage_lv_3+usage_lv_2+usage_lv_1);
                                min_f = tmp_fp;
                                min_counter = tmp_counter;
                                minpos=pos;
                                usage_lv_1++;
                            } else {
                                cell_lv_1[usage_lv_1][0] = tmp_fp;
                                cell_lv_1[usage_lv_1][1] = tmp_counter;
                                test->write_temp(0,slot_idx,pos,usage_lv_3+usage_lv_2+usage_lv_1);
                                usage_lv_1++;
                            }
                        }
                        pos++;
                    }

                    //if new level_1 cell is not full, store min_f
                    if (min_counter > 0 && usage_lv_1 < new_num_lv_1) {
                        cell_lv_1[usage_lv_1][0] = min_f;
                        cell_lv_1[usage_lv_1][1] = min_counter;
                        test->write_temp(0,slot_idx,minpos,usage_lv_3+usage_lv_2+usage_lv_1);
                        usage_lv_1++;
                    }else{
                        int old_fp = min_f & FP_MASK1;
                        int old_long_fp=smallhash(old_fp,slot_idx);
                        test->kickout(0,slot_idx,minpos,min_counter,long_fp,old_long_fp,false);
                    }
                    //Temporary store finished
                    //flush bucket
                    //change when bucket structure changes
                    bucket[0] = 0;
                    bucket[1] = 0;

                    new_start_lv2 = META_LENGTH + new_num_lv_3 * CELL_LEN3;
                    new_start_lv1 = new_start_lv2 + new_num_lv_2 * CELL_LEN2;


                    //insert stored level_3 flows
                    for (j = META_LENGTH, i = 0; i < usage_lv_3; i++, j += CELL_LEN3) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_3[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) |=
                                ((uint32_t) cell_lv_3[i][1]) << ((j + FP_LEN3) & 0x7);
                    }
                    //insert stored level_2 flows
                    for (j = new_start_lv2, i = 0; i < usage_lv_2; i++, j += CELL_LEN2) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_2[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3)) |=
                                ((uint32_t) cell_lv_2[i][1]) << ((j + FP_LEN2) & 0x7);
                    }
                    //insert stored level_1 flows
                    for (j = new_start_lv1, i = 0; i < usage_lv_1; i++, j += CELL_LEN1) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_1[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3)) |=
                                ((uint32_t) cell_lv_1[i][1]) << ((j + FP_LEN1) & 0x7);
                    }
                    test->reslove_temp(0,slot_idx);
                    int metaCode = getMetaCode(new_num_lv_2, new_num_lv_3);
                    bucket[0] |= metaCode & META_MASK;
                    return;
                } else//levelup not success,exponential decay, change when bucket structure changes
                {
                    findMinCell(bucket, 2, num_lv_1, num_lv_2, num_lv_3, min_counter, min_index,min_cell);
                    int old_fp = ((*(uint32_t * )((uint8_t *) bucket + (min_index >> 3))) >> (min_index & 0x7)) & FP_MASK1;
                    int old_long_fp=smallhash(old_fp,slot_idx);
                    //exponential decay,min
                    ranf = 1.0 * rand() / RAND_MAX;
                    if (ranf < pow(upper, log2(min_counter) * -1)) {
                        if (min_counter <= MIN_C_LV_2) {
                            *(uint32_t * )((uint8_t *) bucket + (cell_start_bit_idx >> 3)) &= ~(((uint32_t) CELL_MASK1)
                                    << (cell_start_bit_idx & 0x7));//clear original cell

                            //replace fp
                            *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) &= ~(((uint32_t) FP_MASK2)
                                    << (min_index & 0x7));
                            *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                                    ((uint32_t) fp16 & FP_MASK2) << (min_index & 0x7);
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN2) >> 3)) &= ~(((uint32_t) FP_MASK2)
                                    << ((min_index + FP_LEN2) & 0x7));
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN2) >> 3)) |=
                                    ((uint32_t) MIN_C_LV_2) << ((min_index + FP_LEN2) & 0x7);
                            return;
                        } else//decay
                        {
                            test->kickout(0,slot_idx,min_cell,min_counter,long_fp,old_long_fp,true);
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN2) >> 3)) -=
                                    ((uint32_t) 1) << ((min_index + FP_LEN2) & 0x7);
                            return;
                        }
                    }
                    test->kickout(0,slot_idx,min_cell,min_counter,long_fp,old_long_fp,false);
                }
                break;
            }
            case 3: {
                new_num_lv_2 = num_lv_2 - 1; //get new_num_lv_2
                new_num_lv_3 = num_lv_3 + 1;     //get new_num_lv_3
                int bits_remain = BUCKET_CELLS_LEGTH - CELL_LEN3 * new_num_lv_3 - CELL_LEN2 * new_num_lv_2;

                if (bits_remain >= 0) {//remove a CELL_LEN1-bit cell and the original CELL_LEN2-bit cell
                    new_num_lv_1 = bits_remain / CELL_LEN1;
                    int pos=0;
                    start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;
                    start_lv1 = start_lv2 + num_lv_2 * CELL_LEN2;
                    end_lv1 = start_lv1 + num_lv_1 * CELL_LEN1;

                    for (usage_lv_3 = 0, j = META_LENGTH; j < start_lv2; j += CELL_LEN3) {

                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK3;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3))) >> ((j + FP_LEN3) & 0x7)) &
                                CT_MASK3;

                        if (tmp_counter > 0) {
                            cell_lv_3[usage_lv_3][0] = tmp_fp;
                            cell_lv_3[usage_lv_3][1] = tmp_counter;
                            test->write_temp(0,slot_idx,pos,usage_lv_3);
                            usage_lv_3++;
                        }
                        pos++;
                    }
                    //store fp16
                    int start=usage_lv_3;

                    cell_lv_3[usage_lv_3][0] = fp16;
                    cell_lv_3[usage_lv_3][1] = MIN_C_LV_3;
                    test->write_temp(0,slot_idx,cell_idx,usage_lv_3);
                    usage_lv_3++;

                    //Temporary store level_2 flows except fp16
                    for (usage_lv_2 = 0, j = start_lv2; j < start_lv1; j += CELL_LEN2) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK2;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3))) >> ((j + FP_LEN2) & 0x7)) &
                                CT_MASK2;

                        if (tmp_counter && (tmp_fp != (fp16 & FP_MASK2))) {
                            cell_lv_2[usage_lv_2][0] = tmp_fp;
                            cell_lv_2[usage_lv_2][1] = tmp_counter;
                            test->write_temp(0,slot_idx,pos,usage_lv_3+usage_lv_2);
                            usage_lv_2++;
                        }
                        pos++;
                    }     

                    uint16_t min_f = 0,minpos; //Minimum flow
                    //Temporary store level_1 flows except the min
                    for (j = start_lv1; j < end_lv1; j += CELL_LEN1) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK1;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3))) >> ((j + FP_LEN1) & 0x7)) &
                                CT_MASK1;
                        if (tmp_counter) {
                            if (!min_counter) {
                                minpos=pos;
                                min_f = tmp_fp;
                                min_counter = tmp_counter;
                            } else if (tmp_counter < min_counter) {
                                cell_lv_1[usage_lv_1][0] = min_f;
                                cell_lv_1[usage_lv_1][1] = min_counter;
                                test->write_temp(0,slot_idx,minpos,usage_lv_3+usage_lv_2+usage_lv_1);
                                min_f = tmp_fp;
                                minpos=pos;
                                min_counter = tmp_counter;
                                usage_lv_1++;
                            } else {
                                cell_lv_1[usage_lv_1][0] = tmp_fp;
                                cell_lv_1[usage_lv_1][1] = tmp_counter;
                                test->write_temp(0,slot_idx,pos,usage_lv_3+usage_lv_2+usage_lv_1);
                                usage_lv_1++;
                            }
                        }
                        pos++;
                    }
    
                    //if new level_1 cell is not full, store min_f
                    if (usage_lv_1 < new_num_lv_1) {
                        cell_lv_1[usage_lv_1][0] = min_f;
                        cell_lv_1[usage_lv_1][1] = min_counter;
                        test->write_temp(0,slot_idx,minpos,usage_lv_3+usage_lv_2+usage_lv_1);
                        usage_lv_1++;
                    }else{
                        int old_fp = min_f & FP_MASK1;
                        int old_long_fp=smallhash(old_fp,slot_idx);
                        test->kickout(0,slot_idx,minpos,min_counter,long_fp,old_long_fp,false);
                    }
                    //flush bucket
                    //change when bucket structure changes
                    bucket[0] = 0;
                    bucket[1] = 0;

                    new_start_lv2 = META_LENGTH + new_num_lv_3 * CELL_LEN3;
                    new_start_lv1 = new_start_lv2 + new_num_lv_2 * CELL_LEN2;


                    //insert stored level_3 flows
                    for (j = META_LENGTH, i = 0; i < usage_lv_3; i++, j += CELL_LEN3) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_3[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) |=
                                ((uint32_t) cell_lv_3[i][1]) << ((j + FP_LEN3) & 0x7);
                    }
                    //insert stored level_2 flows
                    for (j = new_start_lv2, i = 0; i < usage_lv_2; i++, j += CELL_LEN2) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_2[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3)) |=
                                ((uint32_t) cell_lv_2[i][1]) << ((j + FP_LEN2) & 0x7);
                    }
                    //insert stored level_1 flows
                    for (j = new_start_lv1, i = 0; i < usage_lv_1; i++, j += CELL_LEN1) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_1[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3)) |=
                                ((uint32_t) cell_lv_1[i][1]) << ((j + FP_LEN1) & 0x7);
                    }
                    test->reslove_temp(0,slot_idx);
                    int metaCode = getMetaCode(new_num_lv_2, new_num_lv_3);
                    bucket[0] |= metaCode & META_MASK;
                    return;
                }

                new_num_lv_2 = num_lv_2 - 2; //get new_num_lv_2
                new_num_lv_3 = num_lv_3 + 1;     //get new_num_lv_3
                bits_remain = BUCKET_CELLS_LEGTH - CELL_LEN3 * new_num_lv_3 - CELL_LEN2 * new_num_lv_2;

                if (bits_remain >= 0) {//remove another CELL_LEN2-bit cell and the original CELL_LEN2-bit cell
                    new_num_lv_1 = bits_remain / CELL_LEN1;
                    int pos=0;
                    start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;
                    start_lv1 = start_lv2 + num_lv_2 * CELL_LEN2;
                    end_lv1 = start_lv1 + num_lv_1 * CELL_LEN1;

                    //Temporary store level_3 flows
                    for (usage_lv_3 = 0, j = META_LENGTH; j < start_lv2; j += CELL_LEN3) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK3;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3))) >> ((j + FP_LEN3) & 0x7)) &
                                CT_MASK3;

                        if (tmp_counter > 0) {
                            cell_lv_3[usage_lv_3][0] = tmp_fp;
                            cell_lv_3[usage_lv_3][1] = tmp_counter;
                            test->write_temp(0,slot_idx,pos,usage_lv_3);
                            usage_lv_3++;
                        }
                        pos++;
                    }
                    int start=usage_lv_3;

                    //store fp16
                    cell_lv_3[usage_lv_3][0] = fp16;
                    cell_lv_3[usage_lv_3][1] = MIN_C_LV_3;
                    test->write_temp(0,slot_idx,cell_idx,usage_lv_3);
                    usage_lv_3++;

                    uint16_t min_f = 0,minpos; //Minimum flow
                    //Temporary store level_2 flows except fp16 and min
                    for (usage_lv_2 = 0, j = start_lv2; j < start_lv1; j += CELL_LEN2) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK2;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3))) >> ((j + FP_LEN2) & 0x7)) &
                                CT_MASK2;

                        if (tmp_counter && (tmp_fp != (fp16 & FP_MASK2))) {

                            if (!min_counter) {
                                min_f = tmp_fp;
                                min_counter = tmp_counter;
                                minpos=pos;
                            } else if (tmp_counter < min_counter) {
                                cell_lv_2[usage_lv_2][0] = min_f;
                                cell_lv_2[usage_lv_2][1] = min_counter;
                                test->write_temp(0,slot_idx,minpos,usage_lv_3+usage_lv_2);
                                min_f = tmp_fp;
                                minpos=pos;
                                min_counter = tmp_counter;
                                usage_lv_2++;
                            } else {
                                cell_lv_2[usage_lv_2][0] = tmp_fp;
                                cell_lv_2[usage_lv_2][1] = tmp_counter;
                                test->write_temp(0,slot_idx,pos,usage_lv_3+usage_lv_2);
                                usage_lv_2++;
                            }
                        }
                        pos++;
                    }

                    //if new level_2 cell is not full, store min_f
                    if (min_counter > 0 && usage_lv_2 < new_num_lv_2) {
                        cell_lv_2[usage_lv_1][0] = min_f;
                        cell_lv_2[usage_lv_1][1] = min_counter;
                        test->write_temp(0,slot_idx,minpos,usage_lv_3+usage_lv_2);
                        usage_lv_2++;
                    }

                    //Temporary store level_1 flows except the min
                    for (j = start_lv1; j < end_lv1; j += CELL_LEN1) {
                        tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK1;
                        tmp_counter =
                                ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3))) >> ((j + FP_LEN1) & 0x7)) &
                                CT_MASK1;

                        if (tmp_counter) {
                            cell_lv_1[usage_lv_1][0] = tmp_fp;
                            cell_lv_1[usage_lv_1][1] = tmp_counter;
                            test->write_temp(0,slot_idx,pos,usage_lv_3+usage_lv_2+usage_lv_1);
                            usage_lv_1++;
                        }
                        pos++;
                    }
                    //Temporary store finished

                    //flush bucket
                    //change when bucket structure changes
                    bucket[0] = 0;
                    bucket[1] = 0;

                    new_start_lv2 = META_LENGTH + new_num_lv_3 * CELL_LEN3;
                    new_start_lv1 = new_start_lv2 + new_num_lv_2 * CELL_LEN2;


                    //insert stored level_3 flows
                    for (j = META_LENGTH, i = 0; i < usage_lv_3; i++, j += CELL_LEN3) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_3[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) |=
                                ((uint32_t) cell_lv_3[i][1]) << ((j + FP_LEN3) & 0x7);
                    }
                    //insert stored level_2 flows
                    for (j = new_start_lv2, i = 0; i < usage_lv_2; i++, j += CELL_LEN2) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_2[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3)) |=
                                ((uint32_t) cell_lv_2[i][1]) << ((j + FP_LEN2) & 0x7);
                    }
                    //insert stored level_1 flows
                    for (j = new_start_lv1, i = 0; i < usage_lv_1; i++, j += CELL_LEN1) {
                        *(uint32_t * )((uint8_t *) bucket + (j >> 3)) |= ((uint32_t) cell_lv_1[i][0]) << (j & 0x7);
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3)) |=
                                ((uint32_t) cell_lv_1[i][1]) << ((j + FP_LEN1) & 0x7);
                    }
                    test->reslove_temp(0,slot_idx);
                    int metaCode = getMetaCode(new_num_lv_2, new_num_lv_3);
                    bucket[0] |= metaCode & META_MASK;

                    return;
                } else//levelup not success,exponential decay, change when bucket structure changes
                {
                    findMinCell(bucket, 3, num_lv_1, num_lv_2, num_lv_3, min_counter, min_index,min_cell);
                    int old_fp = ((*(uint32_t * )((uint8_t *) bucket + (min_index >> 3))) >> (min_index & 0x7)) & FP_MASK1;
                    int old_long_fp=smallhash(old_fp,slot_idx);
                    if (min_counter > EXP_MODE_MASK) {
                        test->kickout(0,slot_idx,min_cell,min_counter,long_fp,old_long_fp,false);
                        return;
                    }

                    //exponential decay
                    ranf = 1.0 * rand() / RAND_MAX;
                    if (ranf < pow(upper, log2(min_counter) * -1)) {
                        if (min_counter == MIN_C_LV_3)//only replace fingerprints
                        {
                            *(uint32_t * )((uint8_t *) bucket + (cell_start_bit_idx >> 3)) &= ~(((uint32_t) CELL_MASK2)
                                    << (cell_start_bit_idx & 0x7));//clear original cell

                            //replace fp
                            *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) &= ~(((uint32_t) FP_MASK3)
                                    << (min_index & 0x7));
                            *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                                    ((uint32_t) fp16 & FP_MASK3) << (min_index & 0x7);
                            return;
                        } else//decay
                        {
                            test->kickout(0,slot_idx,min_cell,min_counter,long_fp,old_long_fp,true);
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN3) >> 3)) -=
                                    ((uint32_t) 1) << ((min_index + FP_LEN3) & 0x7);
                            return;
                        }
                    }
                    test->kickout(0,slot_idx,min_cell,min_counter,long_fp,old_long_fp,false);
                }
            }
        }
    }

    void Insert(const char *key,uint key_len=0) {
        uint32_t hash;
        uint16_t bucket_index, fp16;
        int16_t final_key_len = KEY_LEN;
        hash=bobhash->run(key, final_key_len);
        //hash = BKDRHash(key, KEY_SIZE);
        bucket_index = hash % bucket_num;         //get bucket index
        fp16 = bobhash->run(key, final_key_len)%65535;//finger_print_16(hash);                //get fp
        int long_fp=smallhash((fp16&FP_MASK1),bucket_index);
        uint8_t meta;//get meta
        int num_lv_1, num_lv_2, num_lv_3;
        int start_lv2, start_lv1, end_lv1;
        uint64_t *bucket = B[bucket_index];
        meta = bucket[0] & META_MASK;
        metaCodeToData(meta, num_lv_1, num_lv_2, num_lv_3);
        start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;
        start_lv1 = start_lv2 + num_lv_2 * CELL_LEN2;
        end_lv1 = start_lv1 + num_lv_1 * CELL_LEN1;
        int pos=0;
        int j;    
        //each 256-bit bucket is composed of 4 64-bit words, all cells are stored from the lower addresses. In each cell, lower bits are the fingerprint, and higher bits are the counter.
        uint16_t tmp_fp, tmp_counter;
        uint32_t ran;
        double ranf;
        //if exists a flow in level_3
        for (j = META_LENGTH; j < start_lv2; j += CELL_LEN3) {
            tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK3;
            tmp_counter =
                    ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3))) >> ((j + FP_LEN3) & 0x7)) & CT_MASK3;
            if (tmp_fp == fp16 && tmp_counter > 0) {
                test->insert(0,bucket_index,pos,true,long_fp);
                if (!(tmp_counter & EXP_MODE_MASK))//normal mode
                {
                    *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) +=
                            ((uint32_t) 1) << ((j + FP_LEN3) & 0x7);
                } else//exponential mode
                {
                    ran = rand() & 0x7fff;
                    if (ran <= (1 << (15 - 4 - (tmp_counter & 0xF)))) {
                        if (tmp_counter < EXP_CT_OVFL_MASK) {
                            *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) +=
                                    ((uint32_t) 0x10) << ((j + FP_LEN3) & 0x7);
                        } else//coefficient part is overflow
                        {
                            *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) &= ~(((uint32_t) EXP_CT_MASK)
                                    << ((j + FP_LEN3) & 0x7));
                            *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3)) +=
                                    ((uint32_t) 1) << ((j + FP_LEN3) & 0x7);
                        }
                    }
                }
                goto pkt_done;
            }
            pos++;
        }

        //if existing flow in level_2
        for (j = start_lv2; j < start_lv1; j += CELL_LEN2) {
            tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK2;
            tmp_counter =
                    ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3))) >> ((j + FP_LEN2) & 0x7)) & CT_MASK2;

            if (tmp_fp == (fp16 & FP_MASK2) && tmp_counter > 0) {
                if (tmp_counter != CT_MASK2) {
                    test->insert(0,bucket_index,pos,true,long_fp);
                    *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3)) +=
                            ((uint32_t) 1) << ((j + FP_LEN2) & 0x7);
                } else {
                    Switch(bucket, 3, num_lv_1, num_lv_2, num_lv_3, fp16, j,bucket_index,pos);
                }
                goto pkt_done;
            }
            pos++;
        }
        //if existing flow in level_1
        for (j = start_lv1; j < end_lv1; j += CELL_LEN1) {
            tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK1;
            tmp_counter =
                    ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3))) >> ((j + FP_LEN1) & 0x7)) & CT_MASK1;

            if (tmp_fp == (fp16 & FP_MASK1) && tmp_counter > 0) {
                if (rand()%2) {//0.5 prob to update
                    test->insert(0,bucket_index,pos,true,long_fp);
                    if (tmp_counter != 0x7F) {
                        *(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3)) +=
                                ((uint32_t) 1) << ((j + FP_LEN1) & 0x7);
                    } else {
                        Switch(bucket, 2, num_lv_1, num_lv_2, num_lv_3, fp16, j,bucket_index,pos);
                    }
                }
                goto pkt_done;
            }
            pos++;
        }


        if (num_lv_1 > 0) {
            uint16_t min_counter = -1;
            uint16_t min_index = -1;
            uint16_t min_cell;
            findMinCell(bucket, 1, num_lv_1, num_lv_2, num_lv_3, min_counter, min_index,min_cell);
            if (min_counter == 0) {//find empty cell
                if (rand()%2) {
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                            ((uint32_t) fp16 & FP_MASK1) << (min_index & 0x7);
                    *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN1) >> 3)) |=
                            ((uint32_t) 1) << ((min_index + FP_LEN1) & 0x7);
                    test->insert(0,bucket_index,min_cell,false,long_fp);
                }
            } else {//exp decay
                int old_fp = ((*(uint32_t * )((uint8_t *) bucket + (min_index >> 3))) >> (min_index & 0x7)) & FP_MASK1;
                int old_long_fp=smallhash(old_fp,bucket_index);
                int newnum=test->conflict(0,bucket_index,min_cell,min_counter,true,long_fp,old_long_fp);
                if(newnum!=0){
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) &= ~(((uint32_t) FP_MASK1)
                            << (min_index & 0x7));
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                            ((uint32_t) fp16 & FP_MASK1) << (min_index & 0x7);
                    if(newnum==1){
                        *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN1) >> 3)) |=
                                ((uint32_t) newnum) << ((min_index + FP_LEN1) & 0x7);                        
                    }else{
                        if(newnum != 0x7F){
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN1) >> 3)) -=
                                    ((uint32_t) 1) << ((min_index + FP_LEN1) & 0x7);
                        }else{
                            Switch(bucket, 2, num_lv_1, num_lv_2, num_lv_3, fp16, j,bucket_index,pos);
                        }
                    }
                }
            }
        } else if (num_lv_2 > 0) {
            uint16_t min_counter = -1;
            uint16_t min_index = -1;
            uint16_t min_cell;
            findMinCell(bucket, 2, num_lv_1, num_lv_2, num_lv_3, min_counter, min_index,min_cell);
            if (min_counter == 0) {//find empty cell
                *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                        ((uint32_t) fp16 & FP_MASK2) << (min_index & 0x7);
                *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN2) >> 3)) |=
                        ((uint32_t) 1) << ((min_index + FP_LEN2) & 0x7);
                test->insert(0,bucket_index,min_cell,false,long_fp);
            } else {//exp decay
                int old_fp = ((*(uint32_t * )((uint8_t *) bucket + (min_index >> 3))) >> (min_index & 0x7)) & FP_MASK1;
                int old_long_fp=smallhash(old_fp,bucket_index);
                int newnum=test->conflict(0,bucket_index,min_cell,min_counter,true,long_fp,old_long_fp);
                if(newnum!=0){
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) &= ~(((uint32_t) FP_MASK2)
                            << (min_index & 0x7));
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                            ((uint32_t) fp16 & FP_MASK2) << (min_index & 0x7);
                    if(newnum==1){
                        *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN2) >> 3)) |=
                                ((uint32_t) newnum) << ((min_index + FP_LEN2) & 0x7);                        
                    }else{
                        if(newnum !=CT_MASK2){
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN2) >> 3)) +=
                                    ((uint32_t) 1) << ((min_index + FP_LEN2) & 0x7);
                        }else{
                            Switch(bucket, 3, num_lv_1, num_lv_2, num_lv_3, fp16,j,bucket_index,pos);
                        }
                    }
                }
            }
        } else {
            uint16_t min_counter = EXP_MODE_T;
            uint16_t min_index = -1;
            uint16_t min_cell=0;
            findMinCell(bucket, 3, num_lv_1, num_lv_2, num_lv_3, min_counter, min_index,min_cell);
            if (min_counter == 0) {//find empty cell
                *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                        ((uint32_t) fp16 & FP_MASK3) << (min_index & 0x7);
                *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN3) >> 3)) |=
                        ((uint32_t) 1) << ((min_index + FP_LEN3) & 0x7);
                test->insert(0,bucket_index,min_cell,false,long_fp);
            } else if (min_counter < EXP_MODE_T) {//exp decay
                int old_fp = ((*(uint32_t * )((uint8_t *) bucket + (min_index >> 3))) >> (min_index & 0x7)) & FP_MASK1;
                int old_long_fp=smallhash(old_fp,bucket_index);
                int newnum=test->conflict(0,bucket_index,min_cell,min_counter,true,long_fp,old_long_fp);
                if(newnum!=0){
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) &= ~(((uint32_t) FP_MASK3)
                            << (min_index & 0x7));
                    *(uint32_t * )((uint8_t *) bucket + (min_index >> 3)) |=
                            ((uint32_t) fp16 & FP_MASK3) << (min_index & 0x7);
                    if(newnum==1){
                        *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN3) >> 3)) |=
                                ((uint32_t) newnum) << ((min_index + FP_LEN3) & 0x7);                        
                    }else{
                        if(!(newnum & EXP_MODE_MASK)){
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN3) >> 3)) +=
                                    ((uint32_t) 1) << ((min_index + FP_LEN3) & 0x7);
                        }else{
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN3) >> 3)) &= ~(((uint32_t) EXP_CT_MASK)
                                    << ((min_index + FP_LEN3) & 0x7));
                            *(uint32_t * )((uint8_t *) bucket + ((min_index + FP_LEN3) >> 3)) +=
                                    ((uint32_t) 1) << ((min_index + FP_LEN3) & 0x7);
                        }
                    }
                }
            }
        }
        pkt_done:;
    }

    double Query(const char *key,uint key_len=0) {
        int16_t final_key_len = KEY_LEN;
        uint32_t hash;
        uint16_t bucket_index, fp16;
        hash=bobhash->run(key, final_key_len);
        //hash = BKDRHash(key, KEY_SIZE);
        bucket_index = hash % bucket_num;         //get bucket index
        //if(bucket_index>12000)
        //cout<<bucket_index<<endl;
        fp16 =bobhash->run(key, final_key_len)%65535; //finger_print_16(hash); 
        int long_fp=smallhash((fp16&FP_MASK1),bucket_index);
        int num_lv_1, num_lv_2, num_lv_3;
        uint64_t *bucket = B[bucket_index];
        uint8_t meta = bucket[0] & META_MASK;
        metaCodeToData(meta, num_lv_1, num_lv_2, num_lv_3);
        int start_lv2, start_lv1, end_lv1;
        start_lv2 = META_LENGTH + num_lv_3 * CELL_LEN3;
        start_lv1 = start_lv2 + num_lv_2 * CELL_LEN2;
        end_lv1 = start_lv1 + num_lv_1 * CELL_LEN1;


        int j;
        uint16_t tmp_fp, tmp_counter,pos=0;
        //each 256-bit bucket is composed of 4 64-bit words, all cells are stored from the lower addresses. In each cell, lower bits are the fingerprint, and higher bits are the counter.
        //if exists a flow in level_3
        for (j = META_LENGTH; j < start_lv2; j += CELL_LEN3) {
            tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK3;
            tmp_counter = ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN3) >> 3))) >> ((j + FP_LEN3) & 0x7)) & CT_MASK3;
            if (tmp_fp == fp16 && tmp_counter > 0) {
                if (tmp_counter <= EXP_MODE_MASK) {
                    return tmp_counter;
                } else//exponential mode
                {
                    return ((uint32_t) tmp_counter >> 4) * ((uint32_t) 1 << (4 + (tmp_counter & 0xF)));
                }
            }
            pos++;
        }
        //change when bucket structure changes
        //if existing flow in level_2
        for (j = start_lv2; j < start_lv1; j += CELL_LEN2) {
            tmp_fp = ((*(uint32_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK2;
            tmp_counter = ((*(uint32_t * )((uint8_t *) bucket + ((j + FP_LEN2) >> 3))) >> ((j + FP_LEN2) & 0x7)) & CT_MASK2;

            if (tmp_fp == (fp16 & FP_MASK2) && tmp_counter > 0) {
                return tmp_counter;
            }
            pos++;
        }

        //if existing flow in level_1
        for (j = start_lv1; j < end_lv1; j += CELL_LEN1) {
            tmp_fp = ((*(uint16_t * )((uint8_t *) bucket + (j >> 3))) >> (j & 0x7)) & FP_MASK1;
            tmp_counter = ((*(uint16_t * )((uint8_t *) bucket + ((j + FP_LEN1) >> 3))) >> ((j + FP_LEN1) & 0x7)) & CT_MASK1;

            if (tmp_fp == (fp16 & FP_MASK1) && tmp_counter > 0) {
                return ((uint32_t) tmp_counter) * 2;
            }
            pos++;
        }
        return 0;
    }
};
