#ifndef _BitMatcher_H
#define _BitMatcher_H

#include <iostream>
#include <stdio.h>
#include <algorithm>
#include <cstring>
#include <string.h>
#include <stdlib.h>
#include "BOBHash.h"
#include "UseSketch.h"
#include <stdint.h>
#define NDEBUG
#include <cassert>
#include <unordered_map>
#define ENABLE_CM_SKETCH 0
bool debug=false;
int debug_f=0;
/* the bucket type macro */
int BUCKET_LEN=64;
int FINGERPRINT_LENGTH=8;
int TYPE_ID_LENGTH=4;
int BUCKET_TYPE_NUM=12;
int FINGERPRINT_MAX_NUM=5;
int CONFIG_LENGTH=(2+FINGERPRINT_MAX_NUM);
int LEVEL_NUM=2;
uint8_t** BUCKET_ITEM_SIZE;
int16_t* RIGHT; 
int16_t* DOWN;
int16_t** Locate_table;
uint8_t** COUNT_LEN;
uint8_t** COUNT_LOC;
uint8_t* FINGERPRINT_LOC;
uint8_t* ENTRY_NUM;

/* The item localation and  */ 

uint32_t TYPE_ID_BITMASK= ((1UL << TYPE_ID_LENGTH) - 1);
uint32_t FINGERPRINT_BITMASK=((1UL << FINGERPRINT_LENGTH) - 1);

ofstream debugfile("debug.txt", ios::app);
// fingerprint_loc: the start bit index of the fingerprint, [x, x+FINGERPRINT_LENGTH]
// fingerprint_loc_mask: the mask for the fingerprint



static inline void init_bucket_parameters() {
	// Check whether the config is correct!

	for (int i = 0; i < BUCKET_TYPE_NUM; i++) {
		const int fingerprint_num = BUCKET_ITEM_SIZE[i][1];
		int total_bit = 0;
		total_bit += TYPE_ID_LENGTH;
		total_bit += FINGERPRINT_LENGTH * fingerprint_num;
		for (int j = 2; j < 2+fingerprint_num; j++) {
			total_bit += BUCKET_ITEM_SIZE[i][j];
		}
		for (int j = 2+fingerprint_num; j < CONFIG_LENGTH; j++) {
			assert(BUCKET_ITEM_SIZE[i][j] == 0);
		}
	}

	for(int i = 0; i < FINGERPRINT_MAX_NUM; i++) {
		FINGERPRINT_LOC[i] = i * FINGERPRINT_LENGTH + TYPE_ID_LENGTH;
	}


	for(int i = 0; i < BUCKET_TYPE_NUM; i++) {
		const int fingerprint_num = BUCKET_ITEM_SIZE[i][1];
		for (int j = 2; j < 2 + fingerprint_num; j++) {
			const int count_idx = j - 2;
			COUNT_LEN[i][count_idx] = BUCKET_ITEM_SIZE[i][j];
			if ( count_idx == 0 ) {
				COUNT_LOC[i][count_idx] = TYPE_ID_LENGTH + FINGERPRINT_LENGTH * fingerprint_num;
			} else {
				COUNT_LOC[i][count_idx] = COUNT_LOC[i][count_idx-1] + COUNT_LEN[i][count_idx-1];
			}
		}
	}
}

//Basic actions based on bit-level operations
static inline __attribute__((always_inline)) uint32_t get_bucket_type_id(uint64_t* bkt) {
	return (*(uint32_t * )bkt) & TYPE_ID_BITMASK;
}

static inline __attribute__((always_inline)) void set_bucket_type_id(uint64_t* bkt, uint32_t type_id) {
	*(uint32_t * )bkt = ((*(uint32_t * )bkt) & ~(TYPE_ID_BITMASK)) | (type_id & TYPE_ID_BITMASK);
}

static inline __attribute__((always_inline)) uint8_t get_item_num_in_bucket_type(uint32_t type_id) {
	return ENTRY_NUM[type_id];
}

static inline __attribute__((always_inline)) uint32_t get_bucket_fingerprint(uint64_t* bkt, int fingerprint_index) {
	return ((*(uint32_t * )((uint8_t *) bkt + (FINGERPRINT_LOC[fingerprint_index] >> 3))) >> (FINGERPRINT_LOC[fingerprint_index] & 0x7)) & FINGERPRINT_BITMASK;
}

static inline __attribute__((always_inline)) void set_bucket_fingerprint(uint64_t* bkt, int fingerprint_index, uint32_t fingerprint) {
	*(uint32_t *)((uint8_t *)bkt + (FINGERPRINT_LOC[fingerprint_index] >> 3)) &= ~(((uint32_t)FINGERPRINT_BITMASK) << (FINGERPRINT_LOC[fingerprint_index] & 0x7));
	*(uint32_t *)((uint8_t *)bkt + (FINGERPRINT_LOC[fingerprint_index] >> 3)) |= ((uint32_t)fingerprint & FINGERPRINT_BITMASK) << (FINGERPRINT_LOC[fingerprint_index] & 0x7);
}

static inline __attribute__((always_inline)) uint32_t get_bucket_count(uint64_t* bkt, int count_index, const uint32_t type_id) {
	return ((*(uint32_t * )((uint8_t *) bkt + (COUNT_LOC[type_id][count_index] >> 3))) >> (COUNT_LOC[type_id][count_index] & 0x7)) & ((1UL << COUNT_LEN[type_id][count_index]) - 1UL);
}

static inline __attribute__((always_inline)) void set_bucket_count(uint64_t* bkt, int count_index, uint32_t count_value, const uint32_t type_id) {
	*(uint32_t *)((uint8_t *)bkt + (COUNT_LOC[type_id][count_index] >> 3)) &= ~(((uint32_t)((1UL << COUNT_LEN[type_id][count_index]) - 1UL)) << (COUNT_LOC[type_id][count_index] & 0x7));
	*(uint32_t *)((uint8_t *)bkt + (COUNT_LOC[type_id][count_index] >> 3)) |= ((uint32_t)count_value & ((1UL << COUNT_LEN[type_id][count_index]) - 1UL)) << (COUNT_LOC[type_id][count_index] & 0x7);
}

/* Hash operations */
#define GET_HASH_VALUE_SENTENCE(key) int16_t final_key_len = KEY_LEN;\
		uint h1 = (bobhash[0]->run(key, final_key_len)); \
		char s1 = (char) (h1 >> 24); \
		char s2 = (char) (h1 >> 16); \
		char s3 = (char) (h1 >> 8); \
		char s4 = (char) (h1); \
		uint32_t fp = (bobhash[1]->run(key, final_key_len))&FINGERPRINT_BITMASK;  /*(uint8_t) (s1 ^ s2 ^ s3 ^ s4);*/ \
		if (fp == NULL) { \
			if ( ( s1 | s2 ) != NULL ) { fp = (s1 | s2); } \
			else if ( (s3 | s4 ) != NULL) {fp = (s3 | s4);} \
			else {fp = 1;} \
			}\
		h1 = h1 % bucket_num; \
		assert( (h1^(fp)) <= bucket_num - 1 );\
		uint h2 = ( h1 ^ (fp) ) % bucket_num; \
		uint hash[2] = {h1, h2};

using namespace std;
class BitMatcher
{
private:
	uint bucket_num, maxloop,light_num;	//bucket_num indicates the number of buckets in each array
	bool ***bucket;		//two arrays
	BOBHash * bobhash[2];		//Bob hash function
	UseSketch* test;
	int sketchnow;

public:
	BitMatcher(uint memory,int sketch,int bucket_len=64,int type_bit=4,int fingerprint_len=8,int entry_num=5,int type_num=12,int level_num=2) {
		sketchnow=sketch;
		switch(sketch){
			case 1:{
				bucket_num=int(memory/(bucket_len+entry_num+32)/level_num);
				int m2=int(memory*0.75/8);
				light_num=m2;
				cout<<"Bitmatcher_Elastic: table num"<<level_num<<"bucket num "<<bucket_num<<"light part "<<m2<<"cell num "<<entry_num<<endl;
				test=new Elastic(level_num,bucket_num,entry_num,m2);
				break;
			}
			case 2:{
				bucket_num=int(memory/(bucket_len+2*entry_num+32)/level_num);
				cout<<"Bitmatcher_Waving: table num"<<level_num<<"bucket num"<<bucket_num<<" cell num"<<entry_num<<endl;
				test=new Waving(level_num,bucket_num,entry_num);
				break;
			}
			case 3:{
				bucket_num=int(memory/bucket_len/level_num);
				cout<<"Bitmatcher_RAP: table num"<<level_num<<"bucket num"<<bucket_num<<" cell num"<<entry_num<<endl;
				test=new RAP();
				break;
			}
			default:{
				bucket_num=memory/bucket_len/level_num;//two tables
				cout<<"Bitmatcher_normal: table num"<<level_num<<"bucket num "<<bucket_num<<"cell num "<<entry_num<<endl;
				test=new RAP();
			}
		}
		
		BUCKET_LEN=bucket_len;
		LEVEL_NUM=level_num;
		FINGERPRINT_MAX_NUM = entry_num;
		CONFIG_LENGTH=(2+FINGERPRINT_MAX_NUM);
		TYPE_ID_LENGTH=type_bit;
		FINGERPRINT_LENGTH=fingerprint_len;
		TYPE_ID_BITMASK=((1UL << TYPE_ID_LENGTH) - 1);
		FINGERPRINT_BITMASK=((1UL << FINGERPRINT_LENGTH) - 1);
		BUCKET_TYPE_NUM=type_num;
		int id=get_index(bucket_len,FINGERPRINT_LENGTH,type_bit,entry_num,type_num,false);
		assert(id+1!=BUCKET_TYPE_NUM);
		//init 

		BUCKET_ITEM_SIZE=new uint8_t*[id+1];
	    for (int i = 0; i < id+1; ++i) {
        	BUCKET_ITEM_SIZE[i] = new uint8_t[entry_num+2]{0};
    	}
    	Locate_table=new int16_t*[id+1];
    	for (int i = 0; i < id+1; ++i) {
    	    Locate_table[i] = new int16_t[entry_num+1]{0};
    	}
		//init other tables
    	RIGHT = new int16_t[id+1]{0};
    	DOWN = new int16_t[id+1]{0};
		FINGERPRINT_LOC=new uint8_t[FINGERPRINT_MAX_NUM];

		COUNT_LEN= new uint8_t* [BUCKET_TYPE_NUM];
    	for (int i = 0; i < BUCKET_TYPE_NUM; ++i) {
    	    COUNT_LEN[i] = new uint8_t[FINGERPRINT_MAX_NUM]{0};
    	}

		COUNT_LOC= new uint8_t* [BUCKET_TYPE_NUM];
    	for (int i = 0; i < BUCKET_TYPE_NUM; ++i) {
    	    COUNT_LOC[i] = new uint8_t[FINGERPRINT_MAX_NUM]{0};
    	}
	    //write BUCKET_ITEM_SIZE;
		id=get_index(bucket_len,FINGERPRINT_LENGTH,type_bit,entry_num,type_num,true);

		ENTRY_NUM=new uint8_t[BUCKET_TYPE_NUM];
		for (int i = 0; i < BUCKET_TYPE_NUM; ++i) {
    	    ENTRY_NUM[i] = BUCKET_ITEM_SIZE[i][1];
    	}

		//write Locate_table
		int level=entry_num,row_id=0;
    	for(int i = 0; i < id+1; ++i){
    	    if(BUCKET_ITEM_SIZE[i][1]<level){
    	        level=BUCKET_ITEM_SIZE[i][1];
    	        row_id=0;
    	    }
    	    Locate_table[row_id][entry_num-level]=BUCKET_ITEM_SIZE[i][0];
    	    row_id++;
    	}   
		//write Right and down
    	for(int i = 0; i < id+1; ++i){
    	    for(int j=0;j<entry_num+1;j++){
    	        cout<<Locate_table[i][j]<<" ";
    	        if(Locate_table[i][j]!=0){
    	            RIGHT[Locate_table[i][j]]=Locate_table[i][j+1];
    	            DOWN[Locate_table[i][j]]=Locate_table[i+1][j];
    	        }
    	    }
    	    cout<<endl;
    	}   
		//special situation
    	RIGHT[0]=1;
    	DOWN[0]=0;
    	//for(int i=0;i<id+1;i++){//use this code to debug
    	//    cout<<RIGHT[i]<<" ";
    	//}
    	//cout<<endl;
    	//for(int i=0;i<id+1;i++){
    	//    cout<<DOWN[i]<<" ";
    	//}
		//cout<<endl;

		for (int i = 0; i < 2; i++) {
			bobhash[i] = new BOBHash(i + 1000);
		}

		bucket=new bool**[LEVEL_NUM];
		for (int i = 0; i < LEVEL_NUM; i++) {	//initialize two arrays 
			bucket[i] = new bool*[bucket_num];
	    	for (int j = 0; j < bucket_num; ++j) {
        		bucket[i][j] = new bool[BUCKET_LEN]{0};
    		}	
		}
		init_bucket_parameters();
	}
	
	int get_index(int bit,int fq_len,int wei,int num,int max_id,bool opera){
    	int id=-1;
    	int useful_bit=bit-wei-fq_len*num;//get useful_bit
		int minnum=1;//get minnum
		if(useful_bit<(minnum)*num+((num-1)*num)/2){//
			cout<<"fp too big or bit num too small"<<"the useful bit is "<<useful_bit<<" but we need "<<(minnum)*num+((num-1)*num)/2<<" change them!"<<endl;
			exit(1);
		}
    	while((minnum)*num+((num-1)*num)/2<=useful_bit){
    	    minnum++;
    	}
    	--minnum;
    	int array[10]={0};
    	int temp[10]={0};
    	for(int i=0;i<num;i++){//from high to low
    	    array[i]=minnum+num-1-i;
    	}
    	for(int i=num-1;i>num-1-(useful_bit-((minnum)*num+((num-1)*num)/2));i--){//Allocate the remaining bits
    	    array[i]+=1;
    	}
		int i;
    	for(int i=num;i>1;i--){
    	    for(int j=0;j<i;j++){
    	        temp[j]=array[j];
    	    }
    	    while(i!=1&&temp[0]>=temp[1]){
    	        id++;
    	        if(opera){//we don't use BUCKET_ITEM_SIZE before define it(opera==false)
    	            BUCKET_ITEM_SIZE[id][0]=id;
    	            BUCKET_ITEM_SIZE[id][1]=i;
    	            cout<<id<<" "<<i<<" ";
    	            for(int j=0;j<i;j++){
    	                BUCKET_ITEM_SIZE[id][i+2-1-j]=temp[j];
    	                cout<<temp[j]<<" ";
    	            }
    	            cout<<endl;                
    	        }
    	        temp[0]-=i-1; 
    	        for(int j=1;j<i;j++){
    	            temp[j]+=1;
    	        }
				if(id==max_id-1){
					return max_id-1;
				}
    	    }
    	    array[0]+=fq_len+array[i-1];
    	}
    	if(i==1){
    	    id++;
    	    if(opera){
    	        BUCKET_ITEM_SIZE[id][0]=id;
    	        BUCKET_ITEM_SIZE[id][1]=1;
    	        BUCKET_ITEM_SIZE[id][2]=useful_bit-fq_len;
    	    }
    	}
    	return id;
	}
	
	// Insert the key into the counter
	// Copy a[0] --> b[0], a[1] --> b[1], ..., a[item_num-1] --> b[item_num-1]
	inline void copy_items_one_by_one(uint64_t *dst, uint64_t *src) {//no need to use test
		const uint32_t src_type_id = get_bucket_type_id(src);
		const uint32_t dst_type_id = get_bucket_type_id(dst);
		const uint32_t item_num = get_item_num_in_bucket_type(dst_type_id);
		for (int idx = 0; idx < item_num; idx++) {
			set_bucket_fingerprint(dst, idx, get_bucket_fingerprint(src, idx));
			set_bucket_count(dst, idx, get_bucket_count(src, idx, src_type_id), dst_type_id); 
			// Guarantee the copy is self
			if ( (1UL << COUNT_LEN[dst_type_id][idx]) - 1 < get_bucket_count(src, idx, src_type_id) ) {
				printf("The dst_type_id is %d, the idx is %d, the src_type_id is %d, %d<%d\n", dst_type_id, idx, src_type_id,(1UL << COUNT_LEN[dst_type_id][idx]) - 1,get_bucket_count(src, idx, src_type_id));
				assert(false);
			}
		}
	}

	// Copy a[1] --> b[0], a[2] --> b[1], ..., a[item_num] --> b[item_num-1]
	inline void copy_items_upflow(uint64_t *dst, uint64_t *src,int table_idx,int slot_idx) {
		const uint32_t src_type_id = get_bucket_type_id(src);
		const uint32_t dst_type_id = get_bucket_type_id(dst);
		const uint32_t item_num = get_item_num_in_bucket_type(dst_type_id);
		for (int idx = 0; idx < item_num; idx++) {
			test->write(table_idx,slot_idx,idx+1,table_idx,slot_idx,idx);
			set_bucket_fingerprint(dst, idx, get_bucket_fingerprint(src, 1+idx));
			set_bucket_count(dst, idx, get_bucket_count(src, 1+idx, src_type_id), dst_type_id);
			if ( COUNT_LEN[dst_type_id][idx] < COUNT_LEN[src_type_id][idx + 1] ) {
				printf("The dst_type_id is %d, the idx is %d, the src_type_id is %d\n", dst_type_id, idx, src_type_id);
				assert(false);
			}
		}
	}

	inline bool kick_to(int origin_hash_table_idx, uint32_t origin_bucket_item_idx, uint32_t fingerprint_value, uint32_t count_value,int origin_cell_id) {
		if (fingerprint_value == NULL) {
			return true; // The kick out is NULL
		}
		if(LEVEL_NUM<2){
			return false;
		}
		// printf("kick: origin_hash_table_idx is %d, origin_bucket_item_idx is %d, fingerprint_value is %d, count_value is %d\n", origin_hash_table_idx, origin_bucket_item_idx, fingerprint_value, count_value);

		uint64_t *origin_bucket = (uint64_t*)bucket[origin_hash_table_idx][origin_bucket_item_idx];
		uint32_t new_bucket_idx = ( origin_bucket_item_idx ^ (fingerprint_value) ) % bucket_num;
		uint64_t *dst_bucket = (uint64_t*)bucket[1 - origin_hash_table_idx][new_bucket_idx];

		if (maxloop == 1) {
			maxloop--;
			uint32_t new_bucket_type_id = get_bucket_type_id(dst_bucket);
			uint8_t slot_num = get_item_num_in_bucket_type(new_bucket_type_id);
			for (int i = 0; i < slot_num; i++) {
				if (get_bucket_fingerprint(dst_bucket, i) == NULL &&  ( 1UL << COUNT_LEN[new_bucket_type_id][i]) - 1 > count_value) {
					set_bucket_count(dst_bucket, i, count_value, new_bucket_type_id);
					set_bucket_fingerprint(dst_bucket, i, fingerprint_value);
					test->write(origin_hash_table_idx,origin_bucket_item_idx,origin_cell_id,1 - origin_hash_table_idx,new_bucket_idx,i);
					return true;
				}
			}
		}
		return false; // The kicking fails
	}

	
	inline bool solve_overflow_locally(bool* bit_bucket, const int finger_idx, const uint32_t type_id, const uint32_t table_idx, const uint32_t slot_idx,uint32_t long_fp) { //overflow occur in entry_j in the bucket
		//Search for blank spaces
		uint64_t*b=(uint64_t*)bit_bucket;
		uint32_t src_count = get_bucket_count(b, finger_idx, type_id);
		uint32_t src_fingerprint = get_bucket_fingerprint(b, finger_idx);
	    if(debug){
			cout<<"change! finger:"<<src_fingerprint<<" count:"<<src_count<<endl;	
		}
		for ( int i = finger_idx + 1; i < get_item_num_in_bucket_type(type_id); i++ ) {
			// Try to find an empty entry with more bits
			if ( get_bucket_fingerprint(b, i) == NULL &&( 1UL << COUNT_LEN[type_id][i]) - 1 > src_count) {
				// Find an empty entry: move the data and do not change the type
				set_bucket_fingerprint(b, i, src_fingerprint);
				set_bucket_count(b, i, src_count, type_id);
				set_bucket_fingerprint(b, finger_idx, NULL);
				set_bucket_count(b, finger_idx, 0, type_id);
				test->write(table_idx,slot_idx,finger_idx,table_idx,slot_idx,i);
				return true;
			}
		}
		// Try to exchange with other slot in the same bucket
		for ( int dst_idx = finger_idx + 1; dst_idx < get_item_num_in_bucket_type(type_id); dst_idx++ ) {
			if ( get_bucket_count(b, dst_idx, type_id) < src_count - 1&&( 1UL << COUNT_LEN[type_id][dst_idx]) - 1 > src_count ) {
				// Upper slot can be exchanged
				uint32_t dst_fingerprint = get_bucket_fingerprint(b, dst_idx);
				uint32_t dst_count = get_bucket_count(b, dst_idx, type_id);
				// Exchange
				set_bucket_fingerprint(b, dst_idx, src_fingerprint );
				set_bucket_count(b, dst_idx, src_count, type_id);
				set_bucket_fingerprint(b, finger_idx, dst_fingerprint);
				set_bucket_count(b, finger_idx, dst_count, type_id);
				test->change(table_idx,slot_idx,finger_idx,table_idx,slot_idx,dst_idx);
				return true;
			}
		}
		// No empty entry found, we try to change it to other type of bucket
		bool new_bkt[BUCKET_LEN]={0};
		uint32_t least_finger; uint32_t least_count;
		if(RIGHT[type_id]!=0&&DOWN[type_id]==0){
			bool flag_need_up = false;
			uint32_t next_type_id;
			if ( finger_idx != 0 ) {//If it is not the smallest
				flag_need_up = true;
				if(finger_idx == ENTRY_NUM[type_id]-1){
					next_type_id=RIGHT[type_id];
				}else{
					if(DOWN[RIGHT[type_id]]==0){
						return false;
					}
					next_type_id=DOWN[RIGHT[type_id]];
				}
			}
			// It is the bottom type! We only kick out the least entry
			least_finger = get_bucket_fingerprint(b, 0);
			least_count = get_bucket_count(b, 0, type_id);
			uint32_t min_pos=0,min_count=least_count,min_finger=least_finger;
			for ( int i = 1; i < get_item_num_in_bucket_type(type_id); i++ ) {
				if(min_count>get_bucket_count(b, i, type_id)){
					min_pos=i;
					min_count=get_bucket_count(b, i, type_id);
					min_finger=get_bucket_fingerprint(b, i);
				}
			}
			if(min_pos!=0){//The smallest value isn't located at the position with id 0.
				set_bucket_fingerprint(b, min_pos, least_finger);
				set_bucket_count(b,min_pos,least_count, type_id);
				test->change(table_idx,slot_idx,0,table_idx,slot_idx,min_pos);
				min_pos=0;
				least_count=min_count;
				least_finger=min_finger;
			}//
			uint32_t old_long_fp;
			if(table_idx==0){
				old_long_fp=smallhash(least_finger,slot_idx);
			}else{
				old_long_fp=smallhash(least_finger,(least_finger^slot_idx)%bucket_num);
			}
			set_bucket_fingerprint(b, 0, NULL);
			set_bucket_count(b, 0, 0, type_id);
			bool kick=kick_to(table_idx, slot_idx, least_finger, least_count,min_pos);
			
			if (flag_need_up) {
				// Go to new
				set_bucket_type_id((uint64_t*)new_bkt, next_type_id);
				copy_items_upflow((uint64_t*)new_bkt, b,table_idx,slot_idx);
				for(int i=0;i<BUCKET_LEN;i++){
					bit_bucket[i] = new_bkt[i];
				}
				return true;
			} else if(!kick) {
				set_bucket_fingerprint(b, 0, least_finger);
				set_bucket_count(b, 0, least_count, type_id);
				return false;
			}else{
				return true;
			}
		}else if(RIGHT[type_id]!=0&&DOWN[type_id]!=0){
			// We have 4 entries here and we need to move to next type
			if (finger_idx == ENTRY_NUM[type_id]-1) {//biggest
				// The biggest entry is overflowed, kick out the least entry and go right
				set_bucket_type_id((uint64_t*)new_bkt, RIGHT[type_id]);
				least_finger = get_bucket_fingerprint(b, 0);
				least_count = get_bucket_count(b, 0, type_id);
				uint32_t min_pos=0,min_count=least_count,min_finger=least_finger;
				for ( int i = 1; i < get_item_num_in_bucket_type(type_id); i++ ) {
					if(min_count>get_bucket_count(b, i, type_id)){
						min_pos=i;
						min_count=get_bucket_count(b, i, type_id);
						min_finger=get_bucket_fingerprint(b, i);
					}
				}
				if(min_pos!=0){
					set_bucket_fingerprint(b, min_pos, least_finger);
					set_bucket_count(b,min_pos,least_count, type_id);
					test->change(table_idx,slot_idx,0,table_idx,slot_idx,min_pos);
					min_pos=0;
					least_count=min_count;
					least_finger=min_finger;
				}
				uint32_t old_long_fp;
				if(table_idx==0){
					old_long_fp=smallhash(least_finger,slot_idx);
				}else{
					old_long_fp=smallhash(least_finger,(least_finger^slot_idx)%bucket_num);
				}
				bool res = kick_to(table_idx, slot_idx, least_finger, least_count,min_pos);
				copy_items_upflow((uint64_t*)new_bkt, b,table_idx,slot_idx);
				for(int i=0;i<BUCKET_LEN;i++){
					bit_bucket[i] = new_bkt[i];
				}
				if (res) {
					return true;
				} else {
					return false;
				}
			} else {
				// Go down
				uint32_t possible_next_type_id = DOWN[type_id];
				set_bucket_type_id((uint64_t*)new_bkt, possible_next_type_id);
				uint32_t MAX_LEN_FOR_NEXT_TYPE = COUNT_LEN[possible_next_type_id][ENTRY_NUM[possible_next_type_id]-1];
				assert(MAX_LEN_FOR_NEXT_TYPE >= 10);
				assert(MAX_LEN_FOR_NEXT_TYPE <= 16);
				uint32_t MAX_COUNT_VALUE_FOR_NEXT = ((1ULL << MAX_LEN_FOR_NEXT_TYPE) - 2);
				if ( get_bucket_count(b, ENTRY_NUM[type_id]-1, type_id) <= MAX_COUNT_VALUE_FOR_NEXT ) {
					copy_items_one_by_one((uint64_t*)new_bkt, b);
					for(int i=0;i<BUCKET_LEN;i++){
						bit_bucket[i] = new_bkt[i];
					}
					return true;
				} else {
					uint32_t out_finger = get_bucket_fingerprint(b, finger_idx);
					uint32_t out_count = get_bucket_count(b, finger_idx, type_id);
					set_bucket_fingerprint(b, finger_idx, NULL);
					set_bucket_count(b, finger_idx, 0, type_id);
					bool res = kick_to(table_idx, slot_idx, out_finger, out_count,finger_idx);
					if (res) {
						return true;
					} else {
						set_bucket_fingerprint(b, finger_idx, out_finger);
						set_bucket_count(b, finger_idx,out_count,type_id);
						return false;
					}
				}
			}
		}
		else if(RIGHT[type_id]==0&&DOWN[type_id]!=0){
			// For type 4->10, and they are the rightest column types; we can only go down or keep
			bool go_down_enable = false;
			int possible_next_type_id = type_id + 1;
			int max_id=ENTRY_NUM[type_id]-1;
			uint32_t MAX_LEN_FOR_NEXT_TYPE = COUNT_LEN[possible_next_type_id][ENTRY_NUM[possible_next_type_id]-1];
			assert(MAX_LEN_FOR_NEXT_TYPE >= 13);
			assert(MAX_LEN_FOR_NEXT_TYPE <= 27);
			uint32_t MAX_COUNT_VALUE_FOR_NEXT = ((1ULL << MAX_LEN_FOR_NEXT_TYPE) - 2);
			if (finger_idx < max_id  && get_bucket_count(b, max_id, type_id) <= MAX_COUNT_VALUE_FOR_NEXT ) {
				// 1st or 2nd slot is overflowed; and the largest slow is able to go down
				go_down_enable = true;
				set_bucket_type_id((uint64_t*)new_bkt, possible_next_type_id);
				copy_items_one_by_one((uint64_t*)new_bkt, b);
				for(int i=0;i<BUCKET_LEN;i++){
					bit_bucket[i] = new_bkt[i];
				}
				return true;
			} else {
				// We can not go down, but the value is overflowed; so we try to kick it out; assert(finger_idx != 2);
				uint32_t out_finger = get_bucket_fingerprint(b, finger_idx);
				uint32_t out_count = get_bucket_count(b, finger_idx, type_id);
				set_bucket_fingerprint(b, finger_idx, NULL);
				set_bucket_count(b, finger_idx, 0, type_id);
				bool kick_res =  kick_to(table_idx, slot_idx, out_finger, out_count,finger_idx);
				if (kick_res) {
					return true;
				} else {
					// Kick fails, we keep the value (The kick keeps)
					set_bucket_fingerprint(b, finger_idx, out_finger);
					set_bucket_count(b, finger_idx,out_count,type_id);
					return false;
				}
			}
		}else{
			return false;
		}
		return false;
	}
	
	inline bool plus(bool* bit_bucket, const int finger_idx, const uint32_t type_id, const uint32_t table_idx, const uint32_t slot_idx,int long_fp) { //try to plus entry_j in the bucket, return true if no overflow happens
		uint64_t*b=(uint64_t*)bit_bucket;
		uint32_t original_val = get_bucket_count(b, finger_idx, type_id);
		uint32_t original_fp = get_bucket_fingerprint(b, finger_idx);
		uint32_t old_long_fp;
		if(table_idx==0){
			old_long_fp=smallhash(original_fp,slot_idx);
		}else{
			old_long_fp=smallhash(original_fp,(original_fp^slot_idx)%bucket_num);
		}
	    if(debug){
			debugfile<<"num:"<<original_val<<endl;	
		}
		const uint32_t max_cnt_val = (1UL << COUNT_LEN[type_id][finger_idx]);
		if ( 1 + original_val == max_cnt_val ) {
			if(finger_idx != ENTRY_NUM[type_id]-1){
				uint32_t min_counter = get_bucket_count(b, finger_idx+1, type_id);
            	double ranf = 1.0 * rand() / RAND_MAX;
				//if (ranf < original_val/min_counter*0.5) {
            	if (ranf < pow(1.08, log2(min_counter) * -1)) {
					set_bucket_count(b, finger_idx+1,min_counter-1, type_id);
					
            	}
				if(min_counter>original_val){
					return false; // Overflow happens and we cannot solve it before
				}
			}else{
				//test->kickout(table_idx,slot_idx,finger_idx,original_val,long_fp,old_long_fp,false);
				return false;
			}
		}else{
			original_val++;
			set_bucket_count(b, finger_idx, original_val, type_id);
			test->insert(table_idx,slot_idx,finger_idx,true,long_fp);
		}		
		if ( 1 + original_val == max_cnt_val ) {
			// Overflow happens, we try to move the counter to other location
			bool res = solve_overflow_locally(bit_bucket, finger_idx, type_id, table_idx, slot_idx,long_fp);
			return res;
		}
		return true;
	}

	void Insert(const char *key, const int16_t key_len = 0) {
		maxloop = 1;	
		GET_HASH_VALUE_SENTENCE(key);
		uint32_t long_fp=smallhash(fp,hash[0]);
		bool flag = 0;
		int empty_jj, empty_type_id,empty_table;
		uint64_t* empty_bucket;
		int min_pos,min_table,min_count=INT32_MAX;	
		for (int i = 0; i < LEVEL_NUM; i++) {
			uint64_t* b = (uint64_t*)bucket[i][hash[i]];
			uint32_t type_id = get_bucket_type_id(b);
			uint8_t fingerprint_num = get_item_num_in_bucket_type(type_id);
			//for (int j = fingerprint_num-1; j >= 0; j--) {
			for (int j = 0; j < fingerprint_num; j++) {
				if ( get_bucket_fingerprint(b, j) == (fp&FINGERPRINT_BITMASK) ) {
					// Get the correct bucket, plus it and deal with the kickout
					bool res = plus(bucket[i][hash[i]], j, type_id, i, hash[i],long_fp);
					return;
				} else if ( !flag && get_bucket_fingerprint(b, j) == NULL) {
					empty_bucket = b;
					empty_jj = j;
					empty_type_id = type_id;
					empty_table=i;
					flag = 1; // We have an empty bucket
				}
				if (min_count > get_bucket_count(b,j,type_id)) {
					min_pos = j;
					min_table=i;
					min_count = get_bucket_count(b,j,type_id);
				}
			}
		}
		if (flag) {
			// Insert the key into the empty bucket
	    	if(debug){
				cout<<"enpty"<<endl;	
			}
			set_bucket_fingerprint(empty_bucket, empty_jj, fp);
			set_bucket_count(empty_bucket, empty_jj, 1, empty_type_id);
			test->insert(empty_table,hash[empty_table],empty_jj,false,long_fp);
			return;
		} else {
			static int error_num = 0;
			error_num++; // For web2.data, we find 258K/8M times here!
			if(sketchnow!=0){
	    		if(debug){
					cout<<"conflict"<<endl;	
				}
				// if(error_num % 1 == 0) printf("Error %d\n", error_num);
				uint64_t* b = (uint64_t*)bucket[min_table][hash[min_table]];
				uint32_t old_fp=get_bucket_fingerprint(b, min_pos);
				uint32_t old_long_fp;
				if(min_table==0){
					old_long_fp=smallhash(old_fp,hash[0]);
				}else{
					old_long_fp=smallhash(old_fp,(old_fp^hash[1])%bucket_num);
				}
				
				//uint64_t *b = bucket[min_table] + hash[min_table];
				uint32_t type_id = get_bucket_type_id(b);
				uint32_t count = get_bucket_count(b, min_pos, type_id);
				int new_num=test->conflict(min_table,hash[min_table],min_pos,count,true,long_fp,old_long_fp);
				const uint32_t max_cnt_val = (1UL << COUNT_LEN[type_id][min_pos]);
				if(new_num!=0){
					set_bucket_fingerprint(b,min_pos,fp);
					set_bucket_count(b,min_pos, new_num, type_id);
					if ( 1 + new_num == max_cnt_val ) {
						// Overflow happens, we try to move the counter to other location
						bool res = solve_overflow_locally(bucket[min_table][hash[min_table]], min_pos, type_id, min_table, hash[min_table],long_fp);
						return;
					}
				}
			}else{
				// if(error_num % 1 == 0) printf("Error %d\n", error_num);
				// Here, we store the value to the CM sketch
				int i = fp & 0x1;
				uint64_t* b = (uint64_t*)bucket[i][hash[i]];
				uint32_t type_id = get_bucket_type_id(b);
				uint32_t count = get_bucket_count(b, 0, type_id);
				if ( count == 1 && (fp & 0x2) == ((error_num & 0x1) << 1) ) {
					// Random replace
					set_bucket_fingerprint(b, 0, fp);
					set_bucket_count(b, 0, 1, type_id);
				} else {
					 set_bucket_count(b, 0, count - 1, type_id);
				}
			}
		}
	}
	
	double Query(const char *key, const int16_t key_len = 0) {
		GET_HASH_VALUE_SENTENCE(key);
		uint32_t long_fp=smallhash(fp,hash[0]);
		bool flag=0;
		uint32_t min_value = UINT64_MAX;
		__builtin_prefetch(bucket[0][hash[0]], 0, 2);
		__builtin_prefetch(bucket[0][hash[1]], 0, 2);
		for (uint8_t i = 0; i < LEVEL_NUM; i++) {
			//test->query();
			uint64_t* b = (uint64_t*)bucket[i][hash[i]];
			const uint32_t type_id = get_bucket_type_id(b);
			const uint8_t fingerprint_num = get_item_num_in_bucket_type(type_id);
			for (uint8_t fpt_idx = 0; fpt_idx < fingerprint_num; fpt_idx++) {
				const uint32_t stored_fingerprint = get_bucket_fingerprint(b, fpt_idx);
				const uint32_t stored_count = get_bucket_count(b, fpt_idx, type_id);
				if ( stored_fingerprint == (fp&FINGERPRINT_BITMASK) ) {				
					return stored_count;

				}
				if (!flag && stored_fingerprint == NULL) {
					flag = 1;
				}
			}
		}
		if (flag) { return 0; } 
		else {				
			return 0;

		}
	}
	
	//memeory access

	// the use ratio of the bucke

	~BitMatcher() {
		// for (int i = 0; i < 2; i++) {
		// }
		// for (int i = 0; i < 1; i++) {
		// 	delete bobhash[i];
		// }
	}
};
#endif//_BitMatcher_H