#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <math.h>

#include "bfs.h"
#include "xutil.h"

void bfs_recurse(uint32_t *array, int32_t start, int32_t end, uint32_t index,
		bfs_t *bfs) {
	int32_t mid;

	if (start > end) {
		return;
	}

	mid = start + ceil((float)((end-start)/(1/bfs->skew)));

	// Store parent
	bfs->storage[index-1] = array[mid];

	// Left recursion
	bfs_recurse(array, start, mid-1, (index << 1), bfs);

	// Right recursion
	bfs_recurse(array, mid+1, end, (index << 1)+1, bfs);
}

void bfs_recurse_blazing(uint32_t *array, int32_t start, int32_t end, uint32_t index,
		bfs_t *bfs) {
	int32_t mid;
	int32_t N = end - start + 1;

	if (start > end) {
		return;
	}

	uint32_t height = floor(log2(N)) + 1;

	int32_t nodes = pow(2, height) - 1;

	if (N == nodes) {
		mid = start + ceil((float)((end-start)/(1/bfs->skew)));
	} else {
		uint32_t h = height - 1;

		uint32_t left_size = pow(2, h) - 1;

		uint32_t leaf_count = N - left_size;

		uint32_t left_leaves = pow(2, height - 2);

		if (leaf_count < left_leaves) {
			left_leaves = leaf_count;
		}

		mid = start + (left_size - 1)/2 + left_leaves;
	}

	// Store parent
	bfs->storage[index-1] = array[mid];

	// Left recursion
	bfs_recurse_blazing(array, start, mid-1, (index << 1), bfs);

	// Right recursion
	bfs_recurse_blazing(array, mid+1, end, (index << 1)+1, bfs);
}

bfs_t *bfs_create(uint32_t *array, uint32_t N, double skew) {
	uint32_t size = N*sizeof(uint32_t);

	bfs_t *bfs = xmalloc(sizeof(bfs_t));

	bfs->height = floor(log2(N))+1;
	bfs->nodes = pow(2, bfs->height) - 1;
	bfs->storage = xmalloc(size);
	bfs->skew = skew;
	bfs->N = N;

	memset(bfs->storage, '\0', size);

	bfs_recurse_blazing(array, 0, N-1, 1, bfs);

	return bfs;
}

uint32_t bfs_pred(struct bfs_search *restrict str) {
//uint32_t bfs_pred(uint32_t x, bfs_t *bfs) {
	uint32_t x = str->x;
	bfs_t *restrict bfs = str->bfs;
	uint32_t index = 1;				// Current index
	uint32_t p = UINT32_MAX; 		// Current predecessor
	uint32_t j;     				// Current element
	uint32_t N = bfs->N;

	do {
		j = bfs->storage[index-1];

		if (likely(x < j)) {
			index = index << 1;
		} else {
			p = j;
			index = (index << 1) + 1;
		}

	} while(x != j && index <= N);

	if (x == j) {
		return j;
	}

	return p;
}


/*
bfs_pred:
.LFB29:
    .cfi_startproc
    movl    12(%rsi), %edx  # edx = N
    movq    (%rsi), %r10    # r10 = *storage
    movl    $-1, %eax       # eax = -1
    movl    $1, %esi        # esi|index = 1
    jmp .L53
    .p2align 4,,10
    .p2align 3
.L61:
    testl   %edx, %edx      # subtree_size == 0
    je  .L56                # ----
.L53:
    leal    -1(%rsi), %ecx  # ecx = index - 1
    addl    %esi, %esi      # index = 2 * index <=> index << 1
    movl    %edx, %r8d      # r8d = subtree_size
    leal    1(%rsi), %r9d   # r9d = (index << 1) + 1
    subl    $1, %edx        # subtree_size = subtree_size - 1
    shrl    %r8d            # r8d = subtree_size >> 1
    movl    (%r10,%rcx,4), %ecx # j = storage[index - 1]
    shrl    %edx            # edx = (subtree_size -1) >> 1
    cmpl    %edi, %ecx      # test(j == x)

    cmovbe  %r9d, %esi      # index = (index << 1) + 1
    cmovbe  %ecx, %eax      # p = j

    cmova   %r8d, %edx      # edx = subtree_size = subtree_size >> 1
    cmpl    %ecx, %edi      # x == j
    jne .L61
.L56:
    cmpl    %ecx, %edi
    cmove   %edi, %eax
    ret



OPTIMIZED:
bfs_pred:
.LFB30:
	.cfi_startproc
	movl	12(%rsi), %r9d # edx = N
	movq	(%rsi), %r8    # r8 = *storage
	movl	$-1, %eax      # eax = -1
	movl	$1, %edx       # edx = 1/index
	jmp	.L68
	.p2align 4,,10
	.p2align 3
.L76:
	cmpl	%r9d, %edx # index > N => L71
	ja	.L71
.L68:
	leal	-1(%rdx), %ecx  # ecx = index - 1
	addl	%edx, %edx      # edx = index << 1
	leal	1(%rdx), %esi   # esi = (index << 1) + 1
	movl	(%r8,%rcx,4), %ecx # j = storage[index - 1]
	cmpl	%edi, %ecx      # x == j
	cmovbe	%esi, %edx      # edx = (index << 1) + 1
	cmovbe	%ecx, %eax      # p = j
	cmpl	%ecx, %edi      # x != j => L76
	jne	.L76
.L71:
	cmpl	%ecx, %edi
	cmove	%edi, %eax
	ret


NATIVE:
bfs_pred:
.LFB30:
    .cfi_startproc
    movq    8(%rdi), %rax
    movl    $1, %edx
    movl    (%rdi), %esi
    movl    12(%rax), %r9d
    movq    (%rax), %r8
    movl    $-1, %eax
    jmp .L68
    .p2align 4,,10
    .p2align 3
.L76:
    cmpl    %r9d, %edx
    ja  .L71
.L68:
    leal    -1(%rdx), %ecx
    addl    %edx, %edx
    movl    (%r8,%rcx,4), %ecx
    leal    1(%rdx), %edi
    cmpl    %ecx, %esi
    cmovnb  %edi, %edx
    cmovnb  %ecx, %eax
    cmpl    %ecx, %esi
    jne .L76
.L71:
    cmpl    %ecx, %esi
    cmove   %esi, %eax
    ret
*/
