#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <math.h>

#include "veb.h"
#include "xutil.h"

void veb_table(uint32_t *table, uint32_t N, uint32_t d, uint32_t root_depth,
		uint32_t nodes_this_bot, uint32_t nodes_top_tree) {

	if (N == 1) {
		// Size of this bottom tree
		table[d*3] = nodes_this_bot;

		// Size of corresponding top tree
		table[d*3+1] = nodes_top_tree;

		// Depth of root of corresponding top tree
		table[d*3+2] = root_depth;

		return;
	}

	// Height of full tree (top + bottom)
	uint32_t hT = floor(log2(N))+1;

	// Height of top tree
	uint32_t H0 = ceil(hT/2.0);

	// Amount of nodes in top tree
	uint32_t nodes_top = pow(2, H0)-1;

	// Amount of nodes in each of the bottom trees
	uint32_t nodes_bot = (pow(2, hT) - nodes_top) / pow(2, H0);

	// Recurse into top tree
	veb_table(table, nodes_top, d, root_depth, nodes_this_bot, nodes_top_tree);

	// Recurse into bottom tree
	veb_table(table, nodes_bot, d + H0, d+1, nodes_bot, nodes_top);
}

void veb_recurse(uint32_t *array, int32_t start, int32_t end, uint32_t index,
		uint32_t d, uint32_t *pos, veb_t *veb) {
	int32_t mid;

	if (start > end) {
		return;
	}

	mid = start + ceil((float)(end-start)/2.0f);

	// Store parent
	if (d > 0) {
		pos[d] = pos[veb->table[d*3+2]-1] + veb->table[d*3+1] +
			(index & veb->table[d*3+1]) * veb->table[d*3];
	}

	veb->storage[pos[d]-1] = array[mid];

	// Left recursion
	veb_recurse(array, start, mid-1, (index << 1), d+1, pos, veb);

	// Right recursion
	veb_recurse(array, mid+1, end, (index << 1)+1, d+1, pos, veb);
}

veb_t *veb_create(uint32_t *array, uint32_t N) {
	uint32_t height = floor(log2((double)N))+1;
	uint32_t nodes = pow(2, height)-1;
	uint32_t size = nodes*sizeof(uint32_t);
	uint32_t pos[height+1];
	veb_t *veb = xmalloc(sizeof(veb_t));

	pos[0] = 1;

	veb->height = height;
	veb->nodes = nodes;
	veb->table = xcalloc(3 * height, sizeof(uint32_t));
	veb->storage = xmalloc(size);
	veb->N = N;

	memset(veb->storage, '\0', size);

	if (N == 1) {
		veb->storage[0] = array[0];
		return veb;
	}

	veb_table(veb->table, N, 0, 1, 0, 0);

	veb_recurse(array, 0, N-1, 1, 0, (uint32_t *)&pos, veb);

	return veb;
}

uint32_t veb_pred(struct veb_search *restrict str) {
	uint32_t x = str->x;
	veb_t *restrict veb = str->veb;
	uint32_t index = 1; 				// Current index
	uint32_t p = UINT32_MAX;			// Current predecessor
	uint32_t j; 						// Current element
	uint32_t d = 1; 					// Current depth
	uint32_t pos[veb->height+1];		// Position array
	uint32_t subtree_size = veb->N; 	// Current subtree size

	pos[0] = 1;

	// We do this to remove conditional branch in while loop
	j = veb->storage[0];
	if (x < j) {
		subtree_size = subtree_size >> 1;
		index = (index << 1);
	} else {
		p = j;
		subtree_size = (subtree_size - 1) >> 1;
		index = (index << 1)+1;
	}

	while(x != j && subtree_size != 0) {
		pos[d] = pos[veb->table[d*3+2]-1] + veb->table[d*3+1] +
			(index & veb->table[d*3+1]) * veb->table[d*3];

		j = veb->storage[pos[d]-1];

		if (x < j) {
			subtree_size = subtree_size >> 1;
			index = (index << 1);
		} else {
			p = j;
			subtree_size = (subtree_size - 1) >> 1;
			index = (index << 1) + 1;
		}

		d++;
	}

	if (x == j) {
		return j;
	}

	return p;
}

/*
.LHOTB7:
	.p2align 4,,15
	.globl	veb_pred
	.hidden	veb_pred
	.type	veb_pred, @function
veb_pred:
.LFB30:
	.cfi_startproc
	pushq	%rbp           # Push base pointer
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp     # rbp = rsp
	.cfi_def_cfa_register 6
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	.cfi_offset 14, -24
	.cfi_offset 13, -32
	.cfi_offset 12, -40
	.cfi_offset 3, -48
	movl	20(%rsi), %eax  # eax = height
	movq	8(%rsi), %r12   # r12 = storage
	movl	24(%rsi), %edx  # edx = N
	addl	$1, %eax        # eax = height + 1
	leaq	18(,%rax,4), %rax # rax = rax*4 + 18
	shrq	$4, %rax        #
	salq	$4, %rax
	subq	%rax, %rsp      # nmb bytes in pos?
	leaq	3(%rsp), %rax
	shrq	$2, %rax
	leaq	0(,%rax,4), %rbx
	movl	$1, 0(,%rax,4)
	movl	(%r12), %eax
	cmpl	%edi, %eax
	jbe	.L36
	shrl	%edx
	movl	$-1, %eax
	movl	$2, %r8d
.L37:
	movq	(%rsi), %r11    # r11 = table
	movl	$5, %r9d        # r9d = 5 (d*3+2)
	movl	$1, %r10d       # r10d = 1
	jmp	.L38
	.p2align 4,,10
	.p2align 3
.L47:
	testl	%edx, %edx      # subtree_size == 0? => .L41
	je	.L41
.L38:
	leal	-1(%r9), %ecx   # ecx  = r9 - 1
	leal	-2(%r9), %r14d  # r14d = r9 - 2
	movl	(%r11,%rcx,4), %esi # Load.. esi = table + d*3+1
	movl	%r9d, %ecx      # ecx = d*3+2
	movl	(%r11,%rcx,4), %ecx # ecx = table + 4 * d*3+2
	leal	-1(%rcx), %r13d # r13d = rcx - 1 || veb->table[d*3+2]-1
	movl	%esi, %ecx      # ecx = table[d*3 + 1]
	andl	%r8d, %ecx      # index & table[d*3+1]
	addl	%r8d, %r8d      # index << 1
	imull	(%r11,%r14,4), %ecx # table[d*3] * index & table[d*3+1]
	addl	(%rbx,%r13,4), %ecx # pos[table[d*3 + 1] -1] + index & table[d*3+1]*table[d*3]
	leal	1(%r8), %r13d   # r13d = (index << 1) + 1
	addl	%esi, %ecx      # ecx  = pos[d]
	movl	%r10d, %esi     # esi  = 1
	movl	%ecx, (%rbx,%rsi,4) # save pos[d]
	subl	$1, %ecx        # ecx = 1
	movl	%edx, %esi      # esi = subtree_size
	movl	(%r12,%rcx,4), %ecx # j = ....
	subl	$1, %edx        # subtreesize -= 1
	shrl	%esi            # subtreesize|esi >> 1
	shrl	%edx            # (subtreesize -1) >> 1
	cmpl	%ecx, %edi      # x == j
	cmovnb	%r13d, %r8d     # if above, r8d = (index << 1) + 1
	cmovnb	%ecx, %eax      # --||--, p = j
	cmovb	%esi, %edx      # below, edx = subtreesize >> 1
	addl	$1, %r10d       # r10 += 1
	addl	$3, %r9d        # r9  += 3
	cmpl	%ecx, %edi      # x == j
	jne	.L47
.L41:
	cmpl	%ecx, %edi
	cmove	%edi, %eax
	leaq	-32(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.L36:
	.cfi_restore_state
	subl	$1, %edx
	movl	$3, %r8d
	shrl	%edx
	jmp	.L37
	.cfi_endproc

NATIVE:
veb_pred:
.LFB30:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    pushq   %r14
    pushq   %r13
    pushq   %r12
    pushq   %rbx
    .cfi_offset 14, -24
    .cfi_offset 13, -32
    .cfi_offset 12, -40
    .cfi_offset 3, -48
    movq    8(%rdi), %rcx
    movl    (%rdi), %r9d
    movl    20(%rcx), %eax
    movq    8(%rcx), %r12
    movl    24(%rcx), %edx
    addl    $1, %eax
    leaq    18(,%rax,4), %rax
    shrq    $4, %rax
    salq    $4, %rax
    subq    %rax, %rsp
    leaq    3(%rsp), %rax
    shrq    $2, %rax
    leaq    0(,%rax,4), %rbx
    movl    $1, 0(,%rax,4)
    movl    (%r12), %eax
    cmpl    %eax, %r9d
    jnb .L36
    shrl    %edx
    movl    $-1, %eax
    movl    $2, %esi
.L37:
    movq    (%rcx), %r11
    movl    $5, %r8d
    movl    $1, %r10d
    jmp .L38
    .p2align 4,,10
    .p2align 3
.L47:
.L47:
    testl   %edx, %edx
    je  .L41
.L38:
    leal    -1(%r8), %ecx
    movl    (%r11,%rcx,4), %edi
    leal    -2(%r8), %r14d
    movl    %r8d, %ecx
    movl    (%r11,%rcx,4), %ecx
    leal    -1(%rcx), %r13d
    movl    %edi, %ecx
    andl    %esi, %ecx
    addl    %esi, %esi
    imull   (%r11,%r14,4), %ecx
    addl    (%rbx,%r13,4), %ecx
    leal    1(%rsi), %r13d
    addl    %edi, %ecx
    movl    %r10d, %edi
    movl    %ecx, (%rbx,%rdi,4)
    subl    $1, %ecx
    movl    %edx, %edi
    subl    $1, %edx
    movl    (%r12,%rcx,4), %ecx
    shrl    %edi
    shrl    %edx
    cmpl    %ecx, %r9d
    cmovnb  %r13d, %esi
    cmovnb  %ecx, %eax
    cmovb   %edi, %edx
    addl    $1, %r10d
    addl    $3, %r8d
    cmpl    %ecx, %r9d
    jne .L47
.L41:
    cmpl    %ecx, %r9d
    cmove   %r9d, %eax
    leaq    -32(%rbp), %rsp
    popq    %rbx
    popq    %r12
    popq    %r13
    popq    %r14
    popq    %rbp
    .cfi_remember_state
    .cfi_def_cfa 7, 8
    ret
*/
