#include "emu.h"

/* --------------------------------------------------------------------- */

const char *feature_bits[32] = {
    [ KVM_FEATURE_CLOCKSOURCE  ] = "clocksource",
    [ KVM_FEATURE_NOP_IO_DELAY ] = "nop-iodelay",
    [ KVM_FEATURE_MMU_OP       ] = "mmu-op",
};

int pv_have_clock;
int pv_have_cr3_cache;
int pv_have_mmu_op;

/* --------------------------------------------------------------------- */

void pv_clock_update(int wakeup)
{
    static int wakeups;
    static int update;

    if (pv_have_clock)
	return;

    if (wakeup) {
	/* after halt() -- update clock unconditionally */
	update = 1;
	wakeups++;
    } else {
	/* timer irq -- update only if needed */
	update = (0 == wakeups);
	wakeups = 0;
    }

    /* vmexit to userspace so xenner has a chance to update systime */
    if (update)
	emudev_cmd(EMUDEV_CMD_NOP, 0);
}

static void pv_clock_wall(void)
{
    uint64_t wall = EMU_PA(&shared_info.wc_version);

    if (!pv_have_clock)
	return;
    printk(1, "%s: register wall clock at 0x%" PRIx64 "\n",
	   __FUNCTION__, wall);
    if (0 != wrmsrl_safe(MSR_KVM_WALL_CLOCK, wall))
	panic("MSR_KVM_WALL_CLOCK wrmsr failed", NULL);
    printk(1, "%s: v%d %d.%09d\n", __FUNCTION__,
	   shared_info.wc_version,
	   shared_info.wc_sec,
	   shared_info.wc_nsec);
}

void pv_clock_sys(struct xen_cpu *cpu)
{
    uint64_t sys = cpu->v.vcpu_info_pa + offsetof(struct vcpu_info, time);

    if (!pv_have_clock)
	return;
    printk(1, "%s: register vcpu %d clock at 0x%" PRIx64 "\n",
	   __FUNCTION__, cpu->id, sys);
    if (0 != wrmsrl_safe(MSR_KVM_SYSTEM_TIME, sys | 1))
	panic("MSR_KVM_SYSTEM_TIME wrmsr failed", NULL);
    printk(1, "%s: v%d sys %" PRIu64 " tsc %" PRIu64 " mul %u shift %d\n",
	   __FUNCTION__,
	   cpu->v.vcpu_info->time.version,
	   cpu->v.vcpu_info->time.system_time,
	   cpu->v.vcpu_info->time.tsc_timestamp,
	   cpu->v.vcpu_info->time.tsc_to_system_mul,
	   cpu->v.vcpu_info->time.tsc_shift);
}

/* --------------------------------------------------------------------- */

void pv_write_cr3(struct xen_cpu *cpu, ureg_t cr3_mfn)
{
    ureg_t cr3 = frame_to_addr(cr3_mfn);
//    int idx;

#if longmode
    if (cpu->user_mode)
        cpu->user_cr3_mfn = cr3_mfn;
    else
        cpu->kernel_cr3_mfn = cr3_mfn;
#else
    cpu->cr3_mfn = cr3_mfn;
#endif

#if 0
    if (pv_have_mmu_op && cpu->mmu_queue_len)
        pv_mmu_queue_flush(cpu);
#endif

#if 0
    if (cpu->cr3_cache && cpu->cr3_cache->max_idx) {
	for (idx = 0; idx < cpu->cr3_cache->max_idx; idx++) {
	    if (cpu->cr3_cache->entry[idx].guest_cr3 == cr3) {
		/*
		 * Cache-hit: we load the cached host-CR3 value.
		 * This never causes any VM exit. (if it does then the
		 * hypervisor could do nothing with this instruction
		 * and the guest OS would be aborted)
		 */
		vminfo.faults[XEN_FAULT_OTHER_CR3_CACHE_HIT]++;
		write_cr3(cpu->cr3_cache->entry[idx].host_cr3);
		return;
	    }
	}
    }
#endif

    vminfo.faults[XEN_FAULT_OTHER_CR3_LOAD]++;
    write_cr3(cr3);
    return;
}

static void pv_init_cr3(struct xen_cpu *cpu)
{
    uint64_t cache;

    cpu->cr3_cache = get_pages(1, "cr3 cache");
//  cache = EMU_PA(cpu->cr3_cache);    /* physical */
    cache = (uintptr_t)cpu->cr3_cache; /* virtual  */
    printk(1, "%s: register cr3 cache at 0x%" PRIx64 " ...\n",
	   __FUNCTION__, cache);
#if 0
    if (0 != wrmsrl_safe(KVM_MSR_SET_CR3_CACHE, cache)) {
	printk(1, "%s: ... FAILED\n", __FUNCTION__);
	return -1;
    } else {
	printk(1, "%s: ... OK, %d entries\n",
	       __FUNCTION__, cpu->cr3_cache->max_idx);
	return 0;
    }
#endif
}

/* --------------------------------------------------------------------- */

void pv_mmu_queue_flush(struct xen_cpu *cpu)
{
    int rc, pos;

    if (!cpu->mmu_queue_len)
        return;
    for (pos = 0; pos < cpu->mmu_queue_len; pos += rc)
        rc = kvm_hypercall3(KVM_HC_MMU_OP, cpu->mmu_queue_len - pos,
                            EMU_PA(cpu->mmu_queue+pos), 0);
    cpu->mmu_queue_len = 0;
}

static void pv_mmu_queue_op(struct xen_cpu *cpu, void *buffer, int len)
{
    if (cpu->mmu_queue_len + len > sizeof(cpu->mmu_queue))
        pv_mmu_queue_flush(cpu);
    memcpy(cpu->mmu_queue + cpu->mmu_queue_len, buffer, len);
    cpu->mmu_queue_len += len;
}

void pv_mmu_write(struct xen_cpu *cpu, uint64_t pa, uint64_t val)
{
    struct kvm_mmu_op_write_pte wpte;

    wpte.header.op = KVM_MMU_OP_WRITE_PTE;
    wpte.pte_phys  = pa;
    wpte.pte_val   = val;
    pv_mmu_queue_op(cpu, &wpte, sizeof(wpte));
}

void pv_mmu_flush_tlb(struct xen_cpu *cpu)
{
    struct kvm_mmu_op_flush_tlb ftlb;

    ftlb.header.op = KVM_MMU_OP_FLUSH_TLB;
    pv_mmu_queue_op(cpu, &ftlb, sizeof ftlb);
}

void pv_mmu_unpin(struct xen_cpu *cpu, uint64_t pa)
{
    struct kvm_mmu_op_release_pt rpt;

    rpt.header.op = KVM_MMU_OP_RELEASE_PT;
    rpt.pt_phys   = pa;
    pv_mmu_queue_op(cpu, &rpt, sizeof rpt);
}

/* --------------------------------------------------------------------- */

void pv_init(struct xen_cpu *cpu)
{
    char buf[128];
    struct kvm_cpuid_entry entry;
    uint32_t sig[3];
    uint32_t features;

    entry.function = KVM_CPUID_SIGNATURE;
    real_cpuid(&entry);
    sig[0] = entry.ebx;
    sig[1] = entry.ecx;
    sig[2] = entry.edx;
    if (0 != memcmp((char*)sig, "KVMKVMKVM", 10)) {
        printk(1, "%s: no kvm signature: \"%.12s\"\n",
               __FUNCTION__, (char*)sig);
        return;
    }

    entry.function = KVM_CPUID_FEATURES;
    real_cpuid(&entry);
    features = entry.eax;

    snprintf(buf, sizeof(buf), "%s: cpu %d, signature \"%.12s\", features 0x%08x",
	     __FUNCTION__, cpu->id, (char*)sig, features);
    print_bits(1, buf, features, features, feature_bits);

    /* pv clocksource */
    if (features & (1 << KVM_FEATURE_CLOCKSOURCE)) {
	pv_have_clock = 1;
	pv_clock_sys(cpu);
	if (0 == cpu->id)
	    pv_clock_wall();
    }

    /* kvm mmu ops */
    if (features & (1 << KVM_FEATURE_MMU_OP)) {
        printk(1, "%s: kvm mmu op hypercall enabled\n", __FUNCTION__);
	pv_have_mmu_op = 1;
    }

    /* cr3 cache -- WIP */
    if (0 /* features & (1 << KVM_FEATURE_CR3_CACHE) */) {
	pv_have_cr3_cache = 1;
	pv_init_cr3(cpu);
    }
}

/* --------------------------------------------------------------------- */

