On Sun, Feb 10, 2019 at 10:22 PM Waldemar Kozaczuk <jwkozac...@gmail.com>
wrote:

> - clean up modern/legacy virtio support (configurability)
> - add virtio-mmio support including parsing virtio-mmio devices from
> command line
> - enhance boot.S and OSv boot in general to support vmlinux format
> (decompressed ELF 64-bit Linux kernel)
>    - pass e820 and command line parameters through zero-page
> - make APCI optional and/or disable-able which includes:
>    - make pvpanic probe only if ACPI on
>    - make poweroff work when ACPI off
>    - parse SMP information from MP table instead of MADT if ACPI off
> - add option to turn PCI off (make OSv not even try to enumerate PCI)
> - mark boot time end
>

In the final version of this patch, please split it into a few (don't need
20) smaller patches, so it will be easier to understand why each change is
related to.


>
> Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
> ---
>  Makefile               |   1 +
>  arch/x64/arch-setup.cc | 180 ++++++++++++++++++++++++++++++++++++-----
>  arch/x64/boot.S        |  37 ++++++++-
>  arch/x64/loader.ld     |   2 +-
>  arch/x64/power.cc      |  14 +---
>  arch/x64/smp.cc        |  16 +++-
>  drivers/acpi.cc        |  10 ++-
>  drivers/virtio-blk.cc  |   8 +-
>  drivers/virtio-mmio.cc | 131 ++++++++++++++++++++++++++++++
>  drivers/virtio-mmio.hh | 148 +++++++++++++++++++++++++++++++++
>  drivers/virtio-net.cc  |   7 ++
>  drivers/virtio.cc      |   7 +-
>  drivers/virtio.hh      |   1 +
>  loader.cc              |   5 +-
>  14 files changed, 527 insertions(+), 40 deletions(-)
>  create mode 100644 drivers/virtio-mmio.cc
>  create mode 100644 drivers/virtio-mmio.hh
>
> diff --git a/Makefile b/Makefile
> index ff9f2ed3..b905063c 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -821,6 +821,7 @@ drivers += arch/$(arch)/pvclock-abi.o
>  drivers += drivers/virtio.o
>  drivers += drivers/virtio-pci-device.o
>  drivers += drivers/virtio-vring.o
> +drivers += drivers/virtio-mmio.o
>  drivers += drivers/virtio-net.o
>  drivers += drivers/vmxnet3.o
>  drivers += drivers/vmxnet3-queues.o
> diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc
> index 50500662..cf5adb15 100644
> --- a/arch/x64/arch-setup.cc
> +++ b/arch/x64/arch-setup.cc
> @@ -22,6 +22,19 @@
>  #include <osv/commands.hh>
>  #include "dmi.hh"
>
> +// Not sure if Linux zero page is always located at this place
> +// in memory or its address is passed in one of the registers
> +// -> double check
> +#define ZERO_PAGE_START      0x7000
> +#define SETUP_HEADER_OFFSET  0x1f1   // look at bootparam.h in linux
> +#define BOOT_FLAG_OFFSET     sizeof(u8) + 4 * sizeof(u16) + sizeof(u32)
> +
> +#define E820_ENTRIES_OFFSET  0x1e8   // look at bootparam.h in linux
> +#define E820_TABLE_OFFSET    0x2d0   // look at bootparam.h in linux
> +
> +#define CMD_LINE_PTR_OFFSET  sizeof(u8) * 5 + sizeof(u16) * 11 +
> sizeof(u32) * 7
> +#define CMD_LINE_SIZE_OFFSET CMD_LINE_PTR_OFFSET + sizeof(u8) * 2 +
> sizeof(u16) + sizeof(u32) * 3
> +
>  struct multiboot_info_type {
>      u32 flags;
>      u32 mem_lower;
> @@ -61,12 +74,81 @@ struct e820ent {
>      u32 type;
>  } __attribute__((packed));
>
> +struct _e820ent {
> +    u64 addr;
> +    u64 size;
> +    u32 type;
> +} __attribute__((packed));
> +
>  osv_multiboot_info_type* osv_multiboot_info;
>
> -void parse_cmdline(multiboot_info_type& mb)
> +struct mmio_device_info {
> +    u64 address;
> +    u64 size;
> +    unsigned int irq;
> +};
>
+
> +//TODO: For now we are limiting number of mmio devices to two
> +// Ideally we should be using somewhat more dynamic structure
>

Why not just use a std::vector<> that can grow to any length? The nice
thing about C++ is that it makes this very easy to do :-)

+struct mmio_device_info mmio_device_info_entries[2];
> +int mmio_device_info_count = 0;
> +
> +#define VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX "virtio_mmio.device="
> +char* parse_mmio_device_info(char *cmdline, mmio_device_info *info) {
>
+    // [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>]
>

Can you please preceed this function with an expanded version of this
comment,
explaining what kind of standard it tries to adhere to and why/where was it
specified
and who sticks it into the command line?
Also you wrote above "[virtio_mmio.]" as if it is optional, but you search
for the
specific string VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX, where this is not
optional?

Alternatively, maybe things will be simpler if you just inline this parsing
code into
parse_cmdline below, so you don't need the structure to pass the results
back,
you don't need to explain the *purpose* of this function (if it's just
inside parse_cmdline), etc.


> +    char *prefix_pos = strstr(cmdline,VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX);
> +    if (!prefix_pos)
> +        return nullptr;
> +
> +    char *size_pos = prefix_pos +
> strlen(VIRTIO_MMIO_DEVICE_CMDLINE_PREFIX);
> +    if (sscanf(size_pos,"%ld", &info->size) != 1)
> +        return nullptr;
> +
> +    char *at_pos = strstr(size_pos,"@");
> +    if (!at_pos)
> +        return nullptr;
> +
> +    switch(*(at_pos - 1)) {
> +        case 'k':
> +        case 'K':
> +            info->size = info->size * 1024;
> +            break;
> +        case 'm':
> +        case 'M':
> +            info->size = info->size * 1024 * 1024;
> +            break;
> +        default:
> +            break;
> +    }
> +
> +    if (sscanf(at_pos, "@%lli:%u", &info->address, &info->irq) == 2)
> +        return prefix_pos;
> +    else
> +        return nullptr;
> +}
> +
> +//void parse_cmdline(multiboot_info_type& mb)
> +void parse_cmdline(char *cmdline)
>  {
> -    auto p = reinterpret_cast<char*>(mb.cmdline);
> -    osv::parse_cmdline(p);
> +    //auto p = reinterpret_cast<char*>(mb.cmdline);
> +    // We are assuming the mmio devices information is appended to the
> +    // command line (at least it is the case with the firecracker) so
> +    // once we parse those we strip it away so only plain OSv command line
> +    // is left
> +    //TODO: There may be a smarter, better way to parse this information
> +    char *virtio_device_info_pos =
> parse_mmio_device_info(cmdline,mmio_device_info_entries);
> +    if (virtio_device_info_pos) {
> +        mmio_device_info_count++;
> +        *virtio_device_info_pos = 0;
> +
> +        virtio_device_info_pos =
> +            parse_mmio_device_info(virtio_device_info_pos +
> 1,mmio_device_info_entries + 1);
> +        if (virtio_device_info_pos) {
> +            mmio_device_info_count++;
> +        }
> +    }
> +
> +    osv::parse_cmdline(cmdline);
>  }
>
>  void setup_temporary_phys_map()
> @@ -121,28 +203,64 @@ void arch_setup_free_memory()
>  {
>      static ulong edata;
>      asm ("movl $.edata, %0" : "=rm"(edata));
> -    // copy to stack so we don't free it now
> -    auto omb = *osv_multiboot_info;
> -    auto mb = omb.mb;
> -    auto e820_buffer = alloca(mb.mmap_length);
> -    auto e820_size = mb.mmap_length;
> -    memcpy(e820_buffer, reinterpret_cast<void*>(mb.mmap_addr), e820_size);
> +
> +    void *zero_page = reinterpret_cast<void*>(ZERO_PAGE_START);
> +    void *setup_header = zero_page + SETUP_HEADER_OFFSET;
> +
> +    // Grab command line from zero page
> +    u32 cmdline_ptr = *static_cast<u32*>(setup_header +
> CMD_LINE_PTR_OFFSET);
> +    u32 cmdline_size = *static_cast<u32*>(setup_header +
> CMD_LINE_SIZE_OFFSET);
> +
> +    // Copy cmdline from zero page
> +    void* cmdline = reinterpret_cast<void*>((u64)cmdline_ptr);
> +    void *cmdline_copy = alloca(cmdline_size + 1);
> +    memcpy(cmdline_copy,cmdline,cmdline_size);
> +    ((char*)cmdline_copy)[cmdline_size] = 0;
> +
> +    debug_early("Cmdline: ");
> +    debug_early((char*)cmdline_copy);
> +    debug_early("\n");
> +
> +    // Copy e820 information from zero page
> +    struct _e820ent *e820_table = static_cast<struct _e820ent
> *>(zero_page + E820_TABLE_OFFSET);
> +
> +    //TODO: We are assuming two entries but in reality
> +    //there could be more so this logic below needs to be a little smarter
> +    auto e820_size = 48;
> +    auto e820_buffer = alloca(e820_size);
> +    {
> +       struct e820ent *lower = reinterpret_cast<struct
> e820ent*>(e820_buffer);
> +       lower->ent_size = 20;
> +       lower->type = 1;
> +       lower->addr = e820_table[0].addr;
> +       lower->size = e820_table[0].size;
> +
> +       struct e820ent *upper = lower + 1;
> +       upper->ent_size = 20;
> +       upper->type = 1;
> +       upper->addr = e820_table[1].addr;
> +       upper->size = e820_table[1].size;
> +    }
> +
>      for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
>          memory::phys_mem_size += ent.size;
>      });
>      constexpr u64 initial_map = 1 << 30; // 1GB mapped by startup code
>
> -    u64 time;
> -    time = omb.tsc_init_hi;
> -    time = (time << 32) | omb.tsc_init;
> +    //TODO: We are assuming that bootchart-wise we start here but
> +    // in reality it all starts at boot.S:start64_. However what happens
> +    // before this points should take negligible amount of time, no?
> +    u64 time = 0;
> +    //time = omb.tsc_init_hi;
> +    //time = (time << 32) | omb.tsc_init;
>      boot_time.event(0, "", time );
>
> -    time = omb.tsc_disk_done_hi;
> -    time = (time << 32) | omb.tsc_disk_done;
> +    //time = omb.tsc_disk_done_hi;
> +    //time = (time << 32) | omb.tsc_disk_done;
>      boot_time.event(1, "disk read (real mode)", time );
>
> -    time = omb.tsc_uncompress_done_hi;
> -    time = (time << 32) | omb.tsc_uncompress_done;
> +    //time = omb.tsc_uncompress_done_hi;
> +    //time = (time << 32) | omb.tsc_uncompress_done;
>      boot_time.event(2, "uncompress lzloader.elf", time );
>
>      auto c = processor::cpuid(0x80000000);
> @@ -185,7 +303,10 @@ void arch_setup_free_memory()
>      elf_size = edata - elf_phys;
>      mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE);
>      // get rid of the command line, before low memory is unmapped
> -    parse_cmdline(mb);
> +    //parse_cmdline(mb);
> +
> +    parse_cmdline((char*)cmdline_copy);
> +
>      // now that we have some free memory, we can start mapping the rest
>      mmu::switch_to_runtime_page_tables();
>      for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
> @@ -259,6 +380,7 @@ void arch_init_premain()
>  #include "drivers/virtio-scsi.hh"
>  #include "drivers/virtio-net.hh"
>  #include "drivers/virtio-rng.hh"
> +#include "drivers/virtio-mmio.hh"
>  #include "drivers/xenplatform-pci.hh"
>  #include "drivers/ahci.hh"
>  #include "drivers/vmw-pvscsi.hh"
> @@ -268,13 +390,33 @@ void arch_init_premain()
>  void arch_init_drivers()
>  {
>      // initialize panic drivers
> -    panic::pvpanic::probe_and_setup();
> +    // pvpanic depends on ACPI which firecracker
> +    // does not suppot so we disable probing it altogether
> +    //TODO: Is there a way to detect if ACPI is available and
> +    //only then probe pvpanic?
> +    //panic::pvpanic::probe_and_setup();
>

Would be nice to call pvpanic::probe_and_setup() unconditionally, but it
will just do nothing if ACPI is not supported.


>      boot_time.event("pvpanic done");
>
>      // Enumerate PCI devices
> -    pci::pci_device_enumeration();
> +    // PCI is not supported by firecracker
> +    //TODO: Is there a way to detect if PCI is present and only enumerate
> +    //PCI devices then? Somehow even firecracker presents a bus with
> +    //some dummy devices.
> +    //pci::pci_device_enumeration();
>      boot_time.event("pci enumerated");
>
> +    // Register any parsed virtio-mmio devices
> +    for (int d = 0; d < mmio_device_info_count; d++) {
> +        auto info = mmio_device_info_entries[d];
> +        auto mmio_device = new virtio::mmio_device(info.address,
> info.size, info.irq);
> +        if (mmio_device->parse_config()) {
> +            device_manager::instance()->register_device(mmio_device);
> +        }
> +        else {
> +            delete mmio_device;
> +        }
> +    }
> +
>      // Initialize all drivers
>      hw::driver_manager* drvman = hw::driver_manager::instance();
>      drvman->register_driver(virtio::blk::probe);
> diff --git a/arch/x64/boot.S b/arch/x64/boot.S
> index c4b97e2d..3bdc57ea 100644
> --- a/arch/x64/boot.S
> +++ b/arch/x64/boot.S
> @@ -97,9 +97,10 @@ start64:
>      sub %rdi, %rcx
>      xor %eax, %eax
>      rep stosb
> +    mov $0x200000, %rbp
>      mov %rbp, elf_header
>      # %ebx is set by boot16.S before running the loader
> -    mov %rbx, osv_multiboot_info
> +    //mov %rbx, osv_multiboot_info
>      lea init_stack_top, %rsp
>      call premain
>      mov __loader_argc, %edi
> @@ -107,8 +108,42 @@ start64:
>      call main
>      .cfi_endproc
>
> +.code64
> +.global _start64
> +_start64:
> +//TODO: Is there a way to switch to protected mode and then jump to
> start32
> +//which would be even better than what we do below?
> +    //For whatever reason at this point (long mode?) we cannot
> +    //set gdt the way it is done in start32 but linux expects
> +    //similar one so whatever firecracker sets seems to be OK.
> +    //Maybe because OSv gdt has 32-bit entry
> +    //lgdt gdt_desc
> +    //mov $0x10, %eax
> +    //mov %eax, %ds
> +    //mov %eax, %es
> +    //mov %eax, %fs
> +    //mov %eax, %gs
> +    //mov %eax, %ss
> +    // Disable paging and enable PAE
> +    mov $BOOT_CR4, %eax
> +    mov %eax, %cr4
> +    // Setup page tables
> +    lea ident_pt_l4, %eax
> +    mov %eax, %cr3
> +    // Write contents of EDX:EAX to Model Specific Register specified by
> ECX register
> +    mov $0xc0000080, %ecx
> +    mov $0x00000900, %eax
> +    xor %edx, %edx
> +    wrmsr
> +    // Enable paging
> +    mov $BOOT_CR0, %eax
> +    mov %eax, %cr0
> +    jmp start64
> +
>  # The smp trampoline must be in the lower 1MB, so we manually relocate
>  # it to address 0 by subtracting smpboot from any offset
> +//TODO: I am pretty sure we have some logic missing for SMP
> +//given firecracker jumps us into long mode. Not sure how to handle it.
>  .data
>  .global smpboot
>  smpboot:
> diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld
> index efe78d52..e08b310c 100644
> --- a/arch/x64/loader.ld
> +++ b/arch/x64/loader.ld
> @@ -108,4 +108,4 @@ PHDRS {
>         eh_frame PT_GNU_EH_FRAME;
>         note PT_NOTE;
>  }
> -ENTRY(start32);
> +ENTRY(_start64);
> diff --git a/arch/x64/power.cc b/arch/x64/power.cc
> index 81849335..534ffdf0 100644
> --- a/arch/x64/power.cc
> +++ b/arch/x64/power.cc
> @@ -27,17 +27,9 @@ void halt(void)
>
>  void poweroff(void)
>  {
> -    ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
> -    if (ACPI_FAILURE(status)) {
> -        debug("AcpiEnterSleepStatePrep failed: %s\n",
> AcpiFormatException(status));
> -        halt();
> -    }
> -    status = AcpiEnterSleepState(ACPI_STATE_S5);
> -    if (ACPI_FAILURE(status)) {
> -        debug("AcpiEnterSleepState failed: %s\n",
> AcpiFormatException(status));
> -        halt();
> -    }
> -
> +    // Firecracker only supports this as away to shutdown the VM
> +    // Reset using the 8042 PS/2 Controller ("keyboard controller")
>

Again, would be nice to detect once whether ACPI is available, and then
if() the above code with a test whether ACPI available.
By the way, what happens if you don't check the above code, and call
AcpiEnterSleepState() on the firecracker hypervisor? Does it fail without
recognizing its failure? Does it crash? Or what?

+    processor::outb(0xfe, 0x64);
>      // We shouldn't get here on x86.
>      halt();
>  }
> diff --git a/arch/x64/smp.cc b/arch/x64/smp.cc
> index 073ef206..4bdae0c6 100644
> --- a/arch/x64/smp.cc
> +++ b/arch/x64/smp.cc
> @@ -74,7 +74,21 @@ void parse_madt()
>
>  void smp_init()
>  {
> -    parse_madt();
> +    //Firecracker does not support ACPI so
> +    //there is no MADT table
> +    //parse_madt();
> +
> +    //TODO: This a nasty hack as we support
> +    // single vCPU.


Do you want to start supporting firecracker for just a single vCPU?
Maybe we should commit the support only once SMP is supported as well,
since SMP more-or-less universal these days?


> Eventually we should parse out equivalent information
> +    // about all vCPUs from MP table (seems like more ancient way).
> +    // See
> https://github.com/firecracker-microvm/firecracker/blob/7f29bca9ca197283275eab62fddc1c10ab580794/x86_64/src/mptable.rs
> +    auto c = new sched::cpu(0);
> +    c->arch.apic_id = 0;//lapic->Id;
> +    c->arch.acpi_id = 0;//lapic->ProcessorId;
> +    c->arch.initstack.next = smp_stack_free;
> +    smp_stack_free = &c->arch.initstack;
> +    sched::cpus.push_back(c);
> +
>      sched::current_cpu = sched::cpus[0];
>      for (auto c : sched::cpus) {
>          c->incoming_wakeups =
> aligned_array_new<sched::cpu::incoming_wakeup_queue>(sched::cpus.size());
> diff --git a/drivers/acpi.cc b/drivers/acpi.cc
> index 506ca68f..006231f1 100644
> --- a/drivers/acpi.cc
> +++ b/drivers/acpi.cc
> @@ -605,7 +605,9 @@ void init()
>
>  }
>
> -void __attribute__((constructor(init_prio::acpi))) acpi_init_early()
> -{
> -     XENPV_ALTERNATIVE({ acpi::early_init(); }, {});
> -}
> +//TODO: Is there a way to detect if ACPI is available and do not even
> +//try to load relevant information? Right now OSv depends on ACPI
> available
> +//void __attribute__((constructor(init_prio::acpi))) acpi_init_early()
> +//{
> +//     XENPV_ALTERNATIVE({ acpi::early_init(); }, {});
> +//}
> diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc
> index 0bc6fb0f..fa4cba25 100644
> --- a/drivers/virtio-blk.cc
> +++ b/drivers/virtio-blk.cc
> @@ -115,7 +115,6 @@ bool blk::ack_irq()
>  blk::blk(virtio_device& virtio_dev)
>      : virtio_driver(virtio_dev), _ro(false)
>  {
> -
>      _driver_name = "virtio-blk";
>      _id = _instance++;
>      virtio_i("VIRTIO BLK INSTANCE %d", _id);
> @@ -144,6 +143,13 @@ blk::blk(virtio_device& virtio_dev)
>              [=] { return this->ack_irq(); },
>              [=] { t->wake(); });
>      };
> +
> +    int_factory.create_gsi_edge_interrupt = [this,t]() {
> +        return new gsi_edge_interrupt(
> +                _dev.get_irq(),
> +                [=] { if (this->ack_irq()) t->wake(); });
> +    };
> +
>      _dev.register_interrupt(int_factory);
>
>      // Enable indirect descriptor
> diff --git a/drivers/virtio-mmio.cc b/drivers/virtio-mmio.cc
> new file mode 100644
> index 00000000..27d71848
> --- /dev/null
> +++ b/drivers/virtio-mmio.cc
> @@ -0,0 +1,131 @@
> +//
> +// Created by wkozaczuk on 12/25/18.
> +//
>

Please replace this by a more traditional copyright comment.

+
> +#include <osv/debug.hh>
> +#include "virtio-mmio.hh"
> +
> +namespace virtio {
> +
> +// This implements virtio-io mmio device (transport layer, modeled after
> PSI).
>

What's PSI? Maybe PCI? Something else?


> +// Read here -
> https://www.kraxel.org/virtio/virtio-v1.0-cs03-virtio-gpu.html#x1-1080002
> +hw_device_id mmio_device::get_id()
> +{
> +    return hw_device_id(0x1af4, _device_id);
> +}
> +
> +u8 mmio_device::get_status() {
> +    return mmio_getl(_addr_mmio + VIRTIO_MMIO_STATUS) & 0xff;
> +}
> +
> +void mmio_device::set_status(u8 status) {
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_STATUS, status);
> +}
> +
> +u64 mmio_device::get_available_features() {
> +    u64 features;
> +
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1);
> +    features = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES);
> +    features <<= 32;
> +
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0);
> +    features |= mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_FEATURES);
> +
> +    return features;
> +}
> +
> +void mmio_device::set_enabled_features(u64 features) {
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1);
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)(features >>
> 32));
> +
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0);
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_DRIVER_FEATURES, (u32)features);
> +}
> +
> +void mmio_device::kick_queue(int queue_num) {
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NOTIFY, queue_num);
> +}
> +
> +void mmio_device::select_queue(int queue_num) {
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_SEL, queue_num);
> +    assert(!mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY));
> +}
> +
> +u16 mmio_device::get_queue_size() {
> +    return mmio_getl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM_MAX) & 0xffff;
> +}
> +
> +void mmio_device::setup_queue(vring* queue) {
> +    // Set size
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_NUM, queue->size());
> +    //
> +    // Pass addresses
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_LOW,
> (u32)queue->get_desc_addr());
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_DESC_HIGH,
> (u32)(queue->get_desc_addr() >> 32));
> +
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_LOW,
> (u32)queue->get_avail_addr());
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_AVAIL_HIGH,
> (u32)(queue->get_avail_addr() >> 32));
> +
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_LOW,
> (u32)queue->get_used_addr());
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_USED_HIGH,
> (u32)(queue->get_used_addr() >> 32));
> +}
> +
> +void mmio_device::activate_queue(int queue)
> +{
> +    //
> +    // Make it ready
> +    select_queue(queue);
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_QUEUE_READY, 1 );
> +}
> +
> +u8 mmio_device::read_and_ack_isr() {
> +    //TODO: we might need to guard this read and write with a mutex
> +    //to prevent two concurrent interrupts raised against same device step
> +    //on each other. Is it possible? Spec does not seem to say anything
> about it.
> +    unsigned long status = mmio_getl(_addr_mmio +
> VIRTIO_MMIO_INTERRUPT_STATUS);
> +    //Sometimes this assert would be false (maybe of what I am talking
> above).
> +    //assert(status & VIRTIO_MMIO_INT_VRING);
> +    mmio_setl(_addr_mmio + VIRTIO_MMIO_INTERRUPT_ACK, status);
> +    //return true;
> +    return (status & VIRTIO_MMIO_INT_VRING);
> +}
> +
> +u8 mmio_device::read_config(u32 offset) {
> +    return mmio_getb(_addr_mmio + VIRTIO_MMIO_CONFIG + offset);
> +}
> +
> +void mmio_device::register_interrupt(interrupt_factory irq_factory)
> +{
> +    _irq.reset(irq_factory.create_gsi_edge_interrupt());
> +}
> +
> +bool mmio_device::parse_config() {
> +    _addr_mmio = mmio_map(_address, _size);
> +
> +    u32 magic = mmio_getl(_addr_mmio + VIRTIO_MMIO_MAGIC_VALUE);
> +    if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
> +        return false;
> +    }
> +
> +    // Check device version
> +    u32 version = mmio_getl(_addr_mmio + VIRTIO_MMIO_VERSION);
> +    if (version < 1 || version > 2) {
> +        debugf( "Version %ld not supported!\n", version);
> +        return false;
> +    }
> +
> +    _device_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_DEVICE_ID);
> +    if (_device_id == 0) {
> +        //
> +        // virtio-mmio device with an ID 0 is a (dummy) placeholder
> +        // with no function. End probing now with no error reported.
> +        debug( "Dummy virtio-mmio device detected!\n");
> +        return false;
> +    }
> +    _vendor_id = mmio_getl(_addr_mmio + VIRTIO_MMIO_VENDOR_ID);
> +
> +    debugf("Detected virtio-mmio device: (%ld,%ld)\n", _device_id,
> _vendor_id);
> +    return true;
> +}
> +}
> diff --git a/drivers/virtio-mmio.hh b/drivers/virtio-mmio.hh
> new file mode 100644
> index 00000000..25c88f6a
> --- /dev/null
> +++ b/drivers/virtio-mmio.hh
> @@ -0,0 +1,148 @@
> +//
> +// Created by wkozaczuk on 12/25/18.
> +//
> +
> +#ifndef VIRTIO_MMIO_DEVICE_HH
> +#define VIRTIO_MMIO_DEVICE_HH
> +
> +#include <osv/types.h>
> +#include <osv/mmio.hh>
> +#include "virtio-device.hh"
> +#include "virtio-vring.hh"
> +
> +using namespace hw;
> +
> +/* Magic value ("virt" string) - Read Only */
> +#define VIRTIO_MMIO_MAGIC_VALUE                0x000
> +
> +/* Virtio device version - Read Only */
> +#define VIRTIO_MMIO_VERSION            0x004
> +
> +/* Virtio device ID - Read Only */
> +#define VIRTIO_MMIO_DEVICE_ID          0x008
> +
> +/* Virtio vendor ID - Read Only */
> +#define VIRTIO_MMIO_VENDOR_ID          0x00c
> +
> +/* Bitmask of the features supported by the device (host)
> + * (32 bits per set) - Read Only */
> +#define VIRTIO_MMIO_DEVICE_FEATURES    0x010
> +
> +/* Device (host) features set selector - Write Only */
> +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL        0x014
> +
> +/* Bitmask of features activated by the driver (guest)
> + * (32 bits per set) - Write Only */
> +#define VIRTIO_MMIO_DRIVER_FEATURES    0x020
> +
> +/* Activated features set selector - Write Only */
> +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL        0x024
> +
> +/* Queue selector - Write Only */
> +#define VIRTIO_MMIO_QUEUE_SEL          0x030
> +
> +/* Maximum size of the currently selected queue - Read Only */
> +#define VIRTIO_MMIO_QUEUE_NUM_MAX      0x034
> +
> +/* Queue size for the currently selected queue - Write Only */
> +#define VIRTIO_MMIO_QUEUE_NUM          0x038
> +
> +/* Ready bit for the currently selected queue - Read Write */
> +#define VIRTIO_MMIO_QUEUE_READY                0x044
> +
> +/* Queue notifier - Write Only */
> +#define VIRTIO_MMIO_QUEUE_NOTIFY       0x050
> +
> +/* Interrupt status - Read Only */
> +#define VIRTIO_MMIO_INTERRUPT_STATUS   0x060
> +
> +/* Interrupt acknowledge - Write Only */
> +#define VIRTIO_MMIO_INTERRUPT_ACK      0x064
> +
> +/* Device status register - Read Write */
> +#define VIRTIO_MMIO_STATUS             0x070
> +
> +/* Selected queue's Descriptor Table address, 64 bits in two halves */
> +#define VIRTIO_MMIO_QUEUE_DESC_LOW     0x080
> +#define VIRTIO_MMIO_QUEUE_DESC_HIGH    0x084
> +
> +/* Selected queue's Available Ring address, 64 bits in two halves */
> +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW    0x090
> +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH   0x094
> +
> +/* Selected queue's Used Ring address, 64 bits in two halves */
> +#define VIRTIO_MMIO_QUEUE_USED_LOW     0x0a0
> +#define VIRTIO_MMIO_QUEUE_USED_HIGH    0x0a4
> +
> +/* Configuration atomicity value */
> +#define VIRTIO_MMIO_CONFIG_GENERATION  0x0fc
> +
> +/* The config space is defined by each driver as
> + * the per-driver configuration space - Read Write */
> +#define VIRTIO_MMIO_CONFIG             0x100
> +
> +#define VIRTIO_MMIO_INT_VRING          (1 << 0)
> +#define VIRTIO_MMIO_INT_CONFIG         (1 << 1)
> +
> +namespace virtio {
> +
> +class mmio_device : public virtio_device {
> +public:
> +    mmio_device(u64 address, u64 size, unsigned int irq) :
> +        _address(address), _size(size), _irq_no(irq),
> +        _vendor_id(0), _device_id(0), _addr_mmio(0) {}
> +
> +    virtual ~mmio_device() {}
> +
> +    virtual hw_device_id get_id();
> +
> +    virtual void init() {}
> +    virtual void print() {}
> +    virtual void reset() {}
> +
> +    virtual unsigned get_irq() { return _irq_no; }
> +    virtual u8 read_and_ack_isr();
> +    virtual void register_interrupt(interrupt_factory irq_factory);
> +
> +    virtual void select_queue(int queue);
> +    virtual u16 get_queue_size();
> +    virtual void setup_queue(vring *queue);
> +    virtual void activate_queue(int queue);
> +    virtual void kick_queue(int queue);
> +
> +    virtual u64 get_available_features();
> +    virtual bool get_available_feature_bit(int bit) { return false; }
> //TODO: Should be implemented ?
> +
> +    virtual void set_enabled_features(u64 features);
> +    virtual u64 get_enabled_features() { return 0; } //TODO: Should be
> implemented - function calling it in virtio.cc is not used?
> +    virtual bool get_enabled_feature_bit(int bit) { return false; }
> //TODO: Should be implemented - function calling it in virtio.cc is not
> used?
> +
> +    virtual u8 get_status();
> +    virtual void set_status(u8 status);
> +
> +    virtual u8 read_config(u32 offset);
> +    virtual void write_config(u32 offset, u8 byte) {} //TODO: Implement -
> function
> +    // calling it (virtio_driver::virtio_conf_write) is not used by
> anything?
> +
> +    virtual void dump_config() {} //TODO: Implement it
> +
> +    virtual bool is_modern() { return true; };
> +    virtual size_t get_vring_alignment() { return 4096;
> }//VIRTIO_PCI_VRING_ALIGN; };
> +
> +    bool parse_config();
> +
> +private:
> +    u64 _address;
> +    u64 _size;
> +    unsigned int _irq_no;
> +    //u64 _id;
> +    u16 _vendor_id;
> +    u16 _device_id;
> +
> +    mmioaddr_t _addr_mmio;
> +    std::unique_ptr<gsi_edge_interrupt> _irq;
> +};
> +
> +}
> +
> +#endif //VIRTIO_MMIO_DEVICE_HH
> diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc
> index ef2dff66..3522a80d 100644
> --- a/drivers/virtio-net.cc
> +++ b/drivers/virtio-net.cc
> @@ -316,6 +316,13 @@ net::net(virtio_device& dev)
>              [=] { return this->ack_irq(); },
>              [=] { poll_task->wake(); });
>      };
> +
> +    int_factory.create_gsi_edge_interrupt = [this,poll_task]() {
> +        return new gsi_edge_interrupt(
> +            _dev.get_irq(),
> +            [=] { if (this->ack_irq()) poll_task->wake(); });
> +    };
> +
>      _dev.register_interrupt(int_factory);
>
>      fill_rx_ring();
> diff --git a/drivers/virtio.cc b/drivers/virtio.cc
> index 5ef0f94d..9b778522 100644
> --- a/drivers/virtio.cc
> +++ b/drivers/virtio.cc
> @@ -180,11 +180,16 @@ bool virtio_driver::get_device_feature_bit(int bit)
>  void virtio_driver::set_guest_features(u64 features)
>  {
>      _dev.set_enabled_features(features);
> +    _enabled_features = features;
>  }
>
>  bool virtio_driver::get_guest_feature_bit(int bit)
>  {
> -    return _dev.get_enabled_feature_bit(bit);
> +    if (_dev.is_modern()) //TODO: SO far only mmio - will work with
> modern pci?
>

You have now run.py with modern, so can't you check this?


> +        return (_enabled_features & (1 << bit)) != 0;
> +    else
> +        return _dev.get_enabled_feature_bit(bit);
> +    //return _dev.get_enabled_feature_bit(bit);
>  }
>
>  u8 virtio_driver::get_dev_status()
> diff --git a/drivers/virtio.hh b/drivers/virtio.hh
> index ef915e1c..4b59a91a 100644
> --- a/drivers/virtio.hh
> +++ b/drivers/virtio.hh
> @@ -114,6 +114,7 @@ protected:
>      bool _cap_indirect_buf;
>      bool _cap_event_idx = false;
>      static int _disk_idx;
> +    u64 _enabled_features;
>  };
>
>  template <typename T, u16 ID>
> diff --git a/loader.cc b/loader.cc
> index c29bf4c4..530e3816 100644
> --- a/loader.cc
> +++ b/loader.cc
> @@ -55,6 +55,8 @@
>  #include "drivers/null.hh"
>
>  #include "libc/network/__dns.hh"
> +#include "early-console.hh"
> +#include <processor.hh>
>
>  using namespace osv;
>  using namespace osv::clock::literals;
> @@ -426,6 +428,7 @@ void* do_main_thread(void *_main_args)
>      }
>
>      boot_time.event("Total time");
> +    processor::outb(123, 0x3f0);
>

What's this?


>      if (opt_bootchart) {
>          boot_time.print_chart();
> @@ -545,7 +548,7 @@ void main_cont(int loader_argc, char** loader_argv)
>      memory::enable_debug_allocator();
>
>  #ifndef AARCH64_PORT_STUB
> -    acpi::init();
> +    //acpi::init();
>  #endif /* !AARCH64_PORT_STUB */
>
>      if (sched::cpus.size() > sched::max_cpus) {
> --
> 2.19.1
>
> --
> You received this message because you are subscribed to the Google Groups
> "OSv Development" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to osv-dev+unsubscr...@googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.
>

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to