/*
 * %CopyrightBegin%
 *
 * Copyright Ericsson AB 2005-2020. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * %CopyrightEnd%
 */

#ifdef __arm__

#include <stddef.h>	/* offsetof() */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "global.h"
#include "erl_binary.h"

#include "hipe_arch.h"
#include "hipe_native_bif.h"	/* nbif_callemu() */
#include "hipe_bif0.h"

#ifndef __has_builtin
# define __has_builtin(x) 0
#endif

/* Flush dcache and invalidate icache for a range of addresses. */
void hipe_flush_icache_range(void *address, unsigned int nbytes)
{
    void* end = (char*)address + nbytes;

#if ERTS_AT_LEAST_GCC_VSN__(4, 3, 0) || __has_builtin(__builtin___clear_cache)
    __builtin___clear_cache(address, end);
#elif defined(__clang__)
    void __clear_cache(void *start, void *end);
    __clear_cache(address, end);
#elif defined(__linux__)
# if defined(__ARM_EABI__)
    register unsigned long beg __asm__("r0") = (unsigned long)address;
    register unsigned long end __asm__("r1") = (unsigned long)end;
    register unsigned long flg __asm__("r2") = 0;
    register unsigned long scno __asm__("r7") = 0xf0002;
    __asm__ __volatile__("swi 0"	/* sys_cacheflush() */
			 : "=r"(beg)
			 : "0"(beg), "r"(end), "r"(flg), "r"(scno));
# else
    register unsigned long beg __asm__("r0") = (unsigned long)address;
    register unsigned long end __asm__("r1") = (unsigned long)end;
    register unsigned long flg __asm__("r2") = 0;
    __asm__ __volatile__("swi 0x9f0002"	/* sys_cacheflush() */
			 : "=r"(beg)
			 : "0"(beg), "r"(end), "r"(flg));
# endif
#else
# error "Don't know how to flush instruction cache"
#endif
}

void hipe_flush_icache_word(void *address)
{
    hipe_flush_icache_range(address, 4);
}


static int check_callees(Eterm callees)
{
    Eterm *tuple;
    Uint arity;
    Uint i;

    if (is_not_tuple(callees))
	return -1;
    tuple = tuple_val(callees);
    arity = arityval(tuple[0]);
    for (i = 1; i <= arity; ++i) {
	Eterm mfa = tuple[i];
	if (is_atom(mfa))
	    continue;
	if (is_not_tuple(mfa) ||
	    tuple_val(mfa)[0] != make_arityval(3) ||
	    is_not_atom(tuple_val(mfa)[1]) ||
	    is_not_atom(tuple_val(mfa)[2]) ||
	    is_not_small(tuple_val(mfa)[3]) ||
	    unsigned_val(tuple_val(mfa)[3]) > 255)
	    return -1;
    }
    return arity;
}

#define TRAMPOLINE_WORDS 2

static void generate_trampolines(Uint32* address,
                                 int nrcallees, Eterm callees,
                                 Uint32** trampvec)
{
    Uint32* trampoline = address;
    int i;

    for (i = 0; i < nrcallees; ++i) {
        trampoline[0] = 0xE51FF004;     /* ldr pc, [pc,#-4] */
        trampoline[1] = 0;		/* callee's address */
	trampvec[i] = trampoline;
        trampoline += TRAMPOLINE_WORDS;
    }
    hipe_flush_icache_range(address, nrcallees*2*sizeof(Uint32));
}

void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
{
    Uint code_words;
    int nrcallees;
    Eterm trampvecbin;
    Uint32 **trampvec;
    Uint32 *address;

    if (nrbytes & 0x3)
	return NULL;
    code_words = nrbytes / sizeof(Uint32);

    nrcallees = check_callees(callees);
    if (nrcallees < 0)
	return NULL;
    trampvecbin = new_binary(p, NULL, nrcallees*sizeof(Uint32*));
    trampvec = (Uint32**)binary_bytes(trampvecbin);

    address = erts_alloc(ERTS_ALC_T_HIPE_EXEC,
                         (code_words + nrcallees*TRAMPOLINE_WORDS)*sizeof(Uint32));

    generate_trampolines(address + code_words, nrcallees, callees, trampvec);
    *trampolines = trampvecbin;
    return address;
}

void  hipe_free_code(void* code, unsigned int bytes)
{
    erts_free(ERTS_ALC_T_HIPE_EXEC, code);
}

/*
 * ARMv5's support for 32-bit immediates is effectively non-existent.
 * Hence, every 32-bit immediate is stored in memory and loaded via
 * a PC-relative addressing mode. Relocation entries refer to those
 * data words, NOT the load instructions, so patching is trivial.
 */
static void patch_imm32(Uint32 *address, unsigned int imm32)
{
    *address = imm32;
    hipe_flush_icache_word(address);
}

void hipe_patch_load_fe(Uint32 *address, Uint value)
{
    patch_imm32(address, value);
}

int hipe_patch_insn(void *address, Uint32 value, Eterm type)
{
    switch (type) {
      case am_closure:
      case am_constant:
      case am_atom:
      case am_c_const:
	break;
      default:
	return -1;
    }
    patch_imm32((Uint32*)address, value);
    return 0;
}

/* Make stub for native code calling exported beam function
*/
void *hipe_make_native_stub(void *callee_exp, unsigned int beamArity)
{
    unsigned int *code;
    int callemu_offset;
    int is_short_jmp;

    /*
     * Native code calls BEAM via a stub looking as follows:
     *
     * mov r0, #beamArity
     * ldr r8, [pc,#0] // callee_exp
     * b nbif_callemu
     * .long callee_exp
     *
     * or if nbif_callemu is too far away:
     *
     * mov r0, #beamArity
     * ldr r8, [pc,#0] // callee_exp
     * ldr pc, [pc,#0] // nbif_callemu
     * .long callee_exp
     * .long nbif_callemu
     *
     * I'm using r0 and r8 since they aren't used for
     * parameter passing in native code.
     */

    code = erts_alloc(ERTS_ALC_T_HIPE_EXEC, 5*sizeof(Uint32));
    if (!code)
	return NULL;
    callemu_offset = ((int)&nbif_callemu - ((int)&code[2] + 8)) >> 2;
    is_short_jmp = (callemu_offset >= -0x00800000 &&
                    callemu_offset <= 0x007FFFFF);
#ifdef DEBUG
    if (is_short_jmp && (callemu_offset % 3)==0) {
        is_short_jmp = 0;
    }
#endif

    /* mov r0, #beamArity */
    code[0] = 0xE3A00000 | (beamArity & 0xFF);
    /* ldr r8, [pc,#0] // callee_exp */
    code[1] = 0xE59F8000;
    if (is_short_jmp) {
        /* b nbif_callemu */
        code[2] = 0xEA000000 | (callemu_offset & 0x00FFFFFF);
    }
    else {
        /* ldr pc, [pc,#0] // nbif_callemu */
        code[2] = 0xE59FF000;
        /* .long nbif_callemu */
        code[4] = (unsigned int)&nbif_callemu;
    }
    /* .long callee_exp */
    code[3] = (unsigned int)callee_exp;

    hipe_flush_icache_range(code, 5*sizeof(Uint32));

    return code;
}

void hipe_free_native_stub(void* stub)
{
    erts_free(ERTS_ALC_T_HIPE_EXEC, stub);
}

static void patch_b(Uint32 *address, Sint32 offset, Uint32 AA)
{
    Uint32 oldI = *address;
    Uint32 newI = (oldI & 0xFF000000) | (offset & 0x00FFFFFF);
    *address = newI;
    hipe_flush_icache_word(address);
}

int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
{
    Sint32 destOffset = ((Sint32)destAddress - ((Sint32)callAddress+8)) >> 2;
    if (destOffset >= -0x800000 && destOffset <= 0x7FFFFF) {
	/* The destination is within a [-32MB,+32MB[ range from us.
	   We can reach it with a b/bl instruction.
	   This is typical for nearby Erlang code. */
	patch_b((Uint32*)callAddress, destOffset, 0);
    } else {
	/* The destination is too distant for b/bl.
	   Must do a b/bl to the trampoline. */
	Sint32 trampOffset = ((Sint32)trampoline - ((Sint32)callAddress+8)) >> 2;
	if (trampOffset >= -0x800000 && trampOffset <= 0x7FFFFF) {
	    /* Update the trampoline's address computation.
	       (May be redundant, but we can't tell.) */
	    patch_imm32((Uint32*)trampoline+1, (Uint32)destAddress);
	    /* Update this call site. */
	    patch_b((Uint32*)callAddress, trampOffset, 0);
	} else
	    return -1;
    }
    return 0;
}

void hipe_arch_print_pcb(struct hipe_process_state *p)
{
#define U(n,x) \
    printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "")
    U("nra        ", nra);
    U("narity     ", narity);
#undef U
}

#endif /*__arm__*/
