#include <math.h>
#include <assert.h>
#include <stdio.h>

#include "xutil.h"
#include "clark.h"
#include "naive.h"
#include "bitvector.h"
#include "popcount.h"
#include "select.h"

void succinct_clark_preprocess(bitvector_t *restrict B) {
	clark_t *clark = xmalloc(sizeof(clark_t));
	clark->logSize = log2(B->bits);
	clark->spc1 = clark->logSize*log2(clark->logSize);
	clark->bits1 = floor(log2(B->bits)+1);

	double logSize = clark->logSize;
	uint64_t i, j, k;
	uint64_t r, rp;
	uint64_t tmp, tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
	uint64_t index1 = 0, index2 = 0, index3 = 0;
	uint64_t spc1 = clark->spc1, spc2; 
	uint64_t bits1 = clark->bits1, bits2;
	uint64_t size;
	struct succinct_t succ;

	succ.B = B;

	// n/log(n)log(log(n))*log(n+1)
	size = ceil((double)((B->bits/spc1+bits1)*bits1)/WORD)*sizeof(bitvector);
	clark->D2 = xmalloc(size);
	xmemset(clark->D2, size);

	// n/log(n)log(log(n))*log(n+1)
	clark->L1 = xmalloc(size);
	xmemset(clark->L1, size);

	// n/log(n)log(log(n))*log(n+1)
	clark->D1 = xmalloc(size);
	xmemset(clark->D1, size);

	// n/log(n)log(log(n))*log(n+1)
	clark->D3 = xmalloc(size);
	xmemset(clark->D3, size);

	// n/log(n)log(log(n))*log(n+1)
	clark->L2 = xmalloc(size);
	xmemset(clark->L2, size);

	succinct_popcount_preprocess(B);

	for (i = 1; i <= B->bits/spc1; i++) {
		succ.i = i*spc1;
		tmp2 = succinct_popcount_select(&succ);

		// Store 1st-level directory with every position of every 
		// log(n)log(log(n))'th 1 bit.
		bitvector_set_bits(
				clark->D1, 
				index1, 
				bits1,
				tmp2
		);

		// Range
		r = tmp2 - tmp1;

		bitvector_set_bits(clark->L1, (i-1)*bits1, bits1, index2);

		if ( r > pow(spc1, 2) ) {
			// Store 2nd-level directory with all answers of SELECT within the
			// range of r
			for (j = 1; j < spc1; j++) {
				succ.i = j+(i-1)*spc1;
				bitvector_set_bits(
						clark->D2, 
						index2, 
						bits1, 
						succinct_popcount_select(&succ)
				);
				index2 += bits1;
			}
		} else {
			spc2 = floor(log2(r))*log2(logSize);
			bits2 = floor(log2(r)+1);

			tmp3 = tmp1;
			// Re-subdivide the range r and record position, relative to the 
			// start of the range, of every (log(r)log(log(n)))'th 1 bit
			for (j = 1; r > 0 && spc2 > 0 && j <= spc1/spc2; j++) {
				succ.i = (i-1)*spc1+j*spc2;
				tmp4 = succinct_popcount_select(&succ);
				tmp4 = tmp4 <= tmp2 ? tmp4 : tmp2;

				bitvector_set_bits(
						clark->D2,
						index2,
						bits2,
						tmp4-tmp1
				);

				rp = tmp4 - tmp3;

				tmp = (rp != 0) ? log2(rp)*log2(r)*pow(log2(logSize), 2) : 0;

				if (rp > tmp && tmp > 0) {
					// Check if we exceed the L2 index offset
					assert(floor(spc1/spc2) <= floor(log2(logSize)));

					// Store 3rd-level directory with all answers of SELECT 
					// within the range of rp
					bitvector_set_bits(
							clark->L2, 
							((i-1)*floor(log2(logSize))+(j-1))*bits1, 
							bits1, 
							index3
					);
					for (k = 1; k < spc2; k++) {
						succ.i = (i-1)*spc1+(j-1)*spc2+k;
						bitvector_set_bits(
								clark->D3, 
								index3, 
								bits1, 
								succinct_popcount_select(&succ)
						);
						index3 += bits1;
					}
				}
				index2 += bits2;
				tmp3 = tmp4;
			}
		}

		index1 += bits1;
		tmp1 = tmp2;
	}

	succinct_popcount_postprocess(B);

	B->table = clark;
}

void succinct_clark_postprocess(bitvector_t *restrict B) {
	clark_t *clark = (clark_t*) B->table;

	if (NULL != clark) {

		if (NULL != clark->D1) {
			free(clark->D1);
		}

		if (NULL != clark->D2) {
			free(clark->D2);
		}

		if (NULL != clark->D3) {
			free(clark->D3);
		}
		
		if (NULL != clark->L1) {
			free(clark->L1);
		}

		if (NULL != clark->L2) {
			free(clark->L2);
		}

		free(clark);
	}
}

/**
 * Returns offset of the j'th accurance of 1 in the bitvector B
 */
uint64_t succinct_clark_select(struct succinct_t *restrict succ) {
	bitvector_t *restrict B = succ->B;
	clark_t *restrict clark = (clark_t *)B->table;
	uint64_t j = succ->i;
	double logSize = clark->logSize;
	uint64_t spc1 = clark->spc1, spc2; 
	uint64_t bits = B->bits, bits1 = clark->bits1, bits2;
	uint64_t pos1 = floor((double)j/spc1), pos2;
	uint64_t j2 = (j%spc1);
	uint64_t r, rp;
	uint64_t tmp, tmp1, tmp2, tmp3, tmp4, tmp5;
	uint64_t res;
	uint64_t i;

	if ( unlikely(j == 0) ) {
		return 0;
	}

	if ( unlikely(j >= bits) ) {
		return bits;
	}

	if ( unlikely(bits <= BYTE) ) {
		res = pop[bitvector_get_bits(B->B, 0, bits)*9+j];
		return (res != 0) ? res-(BYTE-bits) : bits;
	}

	tmp1 = likely(pos1 > 0) ? 
			bitvector_get_bits(clark->D1, (pos1-1)*bits1, bits1) : 0;
	tmp2 = likely(bits-tmp1 >= spc1) ? 
			bitvector_get_bits(clark->D1, pos1*bits1, bits1) : bits;
	r = tmp2 - tmp1;
	bits2 = floor(log2(r)+1);

	// If j is bigger than amount of ones in the bitvector
	if ( unlikely(tmp1+(j-pos1*spc1) >= bits) ) {
		return bits;
	}

	// If j is equal to leftmost value selected from the first auxiliary 
	// directory
	if ( unlikely(j2 == 0) ) {
		return tmp1;
	} 

	// If the range r is less than 2, we can assume that the rightmost value of
	// the range is the result
	if ( unlikely(r < 2) ) {
		return tmp2;
	}
	
	// If r is bigger than (log2(n)log2(log2(n)))^2, we have stored each
	// select(i) for i = tmp1+1 ... tmp2-1
	if ( unlikely(r > pow(spc1, 2)) ) {
		return bitvector_get_bits(
				clark->D2, 
				bitvector_get_bits(clark->L1, pos1*bits1, bits1)+(j2-1)*bits1, 
				bits1
		);
	}

	spc2 = floor(log2(r))*log2(logSize);
	pos2 = likely(spc2 > 0) ? floor((double)j2/spc2) : 0;

	// We get the leftmost range tmp1 + offset in second auxiliary directory
	tmp3 = tmp1;
	if ( likely(pos2 > 0) ) {
		tmp3 += bitvector_get_bits(
				clark->D2, 
				bitvector_get_bits(clark->L1, pos1*bits1, bits1)+(pos2-1)*bits2, 
				bits2
		);
	}

	if ( unlikely(tmp3 >= bits) ) {
		return bits;
	}

	// We get the rightmost range tmp1 + offset in second auxiliary directory
	tmp4 = tmp3;

	if ( unlikely(pos2+1 >= spc1/spc2 || tmp2-tmp1 < spc1) ) {
		tmp4 = tmp2;
	} else {
		tmp4 += bitvector_get_bits(
				clark->D2, 
				bitvector_get_bits(clark->L1, pos1*bits1, bits1)+pos2*bits2, 
				bits2
		);
	}

	// Calculate r' aka range between leftmost offset and rightmost offset
	rp = tmp4 - tmp3;

	if ( unlikely(spc2 > 0 && j2 % spc2 == 0) ) {
		return tmp3;
	} 

	// If the range r' is less than 2, we can assume that the rightmost value 
	// of the range is the result
	if ( unlikely(rp < 2) ) {
		return tmp4;
	}

	// If r' is bigger than log2(r)*log2(log2(n)), we have stored each
	// select(i) for i = tmp3+1 ... tmp4-1 in third auxiliary directory
	if ( unlikely(rp > 0 && spc2 > 0 && rp > log2(rp)*pow(spc2, 2)) ) {
		return bitvector_get_bits(
				clark->D3, 
				bitvector_get_bits(clark->L2, (pos1*floor(log2(logSize))
						+(pos2-1))*bits1, bits1)+(j2-1)*bits1, 
				bits1
		);
	}

	j2 -= pos2*spc2;
	res = tmp3;

	// We use lookup table as last option
	for (i = 0; i < ceil((double)rp/BYTE); i++) {
		if ( unlikely((i+1)*BYTE > rp) ) {
			tmp5 = bitvector_get_bits(B->B, res, rp%BYTE) << (BYTE-rp%BYTE);
		} else {
			tmp5 = bitvector_get_bits(B->B, res, BYTE);
		}

		tmp = __builtin_popcountll(tmp5);
		if ( unlikely(tmp >= j2) ) {
			res += pop[tmp5*9+j2];
			break;
		} else {
			res += BYTE;
			j2 -= tmp;
		}
	}

	// if res is higher than amount of bits, we return the amount of bits
	if (res > bits) {
		return bits;
	}

	return res;
}
