Re: [PATCH v3 4/5] efi: call get_event_log before ExitBootServices

2018-03-05 Thread Marc-André Lureau
teen 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +/*
> + * Reserve the memory associated with the TPM Event Log configuration table.
> + */
> +int __init efi_tpm_eventlog_init(void)
> +{
> +   struct linux_efi_tpm_eventlog *log_tbl;
> +   unsigned int tbl_size;
> +
> +   if (efi.tpm_log == EFI_INVALID_TABLE_ADDR)
> +   return 0;
> +
> +   log_tbl = early_memremap(efi.tpm_log, sizeof(*log_tbl));
> +   if (!log_tbl) {
> +   pr_err("Failed to map TPM Event Log table @ 0x%lx\n",
> +   efi.tpm_log);
> +   efi.tpm_log = EFI_INVALID_TABLE_ADDR;
> +   return -ENOMEM;
> +   }
> +
> +   tbl_size = sizeof(*log_tbl) + log_tbl->size;
> +   memblock_reserve(efi.tpm_log, tbl_size);
> +   early_memunmap(log_tbl, sizeof(*log_tbl));
> +   return 0;
> +}
> +
> diff --git a/include/linux/efi.h b/include/linux/efi.h
> index 8dc3d94a3e3c..c5805eb601b1 100644
> --- a/include/linux/efi.h
> +++ b/include/linux/efi.h
> @@ -472,6 +472,39 @@ typedef struct {
> u64 get_all;
>  } apple_properties_protocol_64_t;
>
> +typedef struct {
> +   u32 get_capability;
> +   u32 get_event_log;
> +   u32 hash_log_extend_event;
> +   u32 submit_command;
> +   u32 get_active_pcr_banks;
> +   u32 set_active_pcr_banks;
> +   u32 get_result_of_set_active_pcr_banks;
> +} efi_tcg2_protocol_32_t;
> +
> +typedef struct {
> +   u64 get_capability;
> +   u64 get_event_log;
> +   u64 hash_log_extend_event;
> +   u64 submit_command;
> +   u64 get_active_pcr_banks;
> +   u64 set_active_pcr_banks;
> +   u64 get_result_of_set_active_pcr_banks;
> +} efi_tcg2_protocol_64_t;
> +
> +typedef u32 efi_tcg2_event_log_format;
> +
> +typedef struct {
> +   void *get_capability;
> +   efi_status_t (*get_event_log)(efi_handle_t, efi_tcg2_event_log_format,
> +   efi_physical_addr_t *, efi_physical_addr_t *, efi_bool_t *);
> +   void *hash_log_extend_event;
> +   void *submit_command;
> +   void *get_active_pcr_banks;
> +   void *set_active_pcr_banks;
> +   void *get_result_of_set_active_pcr_banks;
> +} efi_tcg2_protocol_t;
> +
>  /*
>   * Types and defines for EFI ResetSystem
>   */
> @@ -622,6 +655,7 @@ void efi_native_runtime_setup(void);
>  #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID   EFI_GUID(0xdcfa911d, 0x26eb, 
> 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
>  #define EFI_CONSOLE_OUT_DEVICE_GUIDEFI_GUID(0xd3b36f2c, 0xd551, 
> 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
>  #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 
> 0x44fb,  0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
> +#define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 
> 0x42be,  0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f)
>
>  #define EFI_IMAGE_SECURITY_DATABASE_GUID   EFI_GUID(0xd719b2cb, 0x3d3a, 
> 0x4596,  0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
>  #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 
> 0x4300,  0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
> @@ -634,6 +668,7 @@ void efi_native_runtime_setup(void);
>  #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID   EFI_GUID(0xe03fc20a, 0x85dc, 
> 0x406e,  0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
>  #define LINUX_EFI_LOADER_ENTRY_GUIDEFI_GUID(0x4a67b082, 0x0a4c, 
> 0x41cf,  0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
>  #define LINUX_EFI_RANDOM_SEED_TABLE_GUID   EFI_GUID(0x1ce1e5bc, 0x7ceb, 
> 0x42f2,  0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b)
> +#define LINUX_EFI_TPM_EVENT_LOG_GUID   EFI_GUID(0xb7799cb0, 0xeca2, 
> 0x4943,  0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa)
>
>  typedef struct {
> efi_guid_t guid;
> @@ -908,6 +943,7 @@ extern struct efi {
> unsigned long properties_table; /* properties table */
> unsigned long mem_attr_table;   /* memory attributes table */
> unsigned long rng_seed; /* UEFI firmware random seed */
> +   unsigned long tpm_log;  /* TPM2 Event Log table */
> efi_get_time_t *get_time;
> efi_set_time_t *set_time;
> efi_get_wakeup_time_t *get_wakeup_time;
> @@ -1504,6 +1540,8 @@ static inline void
>  efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) { }
>  #endif
>
> +void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table);
> +
>  /*
>   * Arch code can implement the following three template macros, avoiding
>   * reptition for the void/non-void return cases of {__,}efi_call_virt():
> @@ -1571,4 +1609,12 @@ struct linux_efi_random_seed {
> u8  bits[];
>  };
>
> +struct linux_efi_tpm_eventlog {
> +   u32 size;
> +   u8  version;
> +   u8  log[];
> +};
> +
> +extern int efi_tpm_eventlog_init(void);
> +
>  #endif /* _LINUX_EFI_H */
> --
> 2.14.1.821.g8fa685d3b7-goog
>



-- 
Marc-André Lureau


Re: [PATCH v16 09/11] crash: export paddr_vmcoreinfo_note()

2018-03-02 Thread Marc-André Lureau
Hi

On Fri, Mar 2, 2018 at 7:03 PM, Michael S. Tsirkin  wrote:
> On Fri, Mar 02, 2018 at 10:32:04AM +0800, Dave Young wrote:
>> Hi
>>
>> On 02/28/18 at 04:06pm, Marc-André Lureau wrote:
>> > The following patch is going to use the symbol from the fw_cfg module,
>> > to call the function and write the note location details in the
>> > vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.
>> >
>> > CC: Andrew Morton 
>> > CC: Baoquan He 
>> > CC: Dave Young 
>> > CC: Dave Young 
>>
>> Above duplicated a CC, btw, I remember both Baoquan and me acked it
>> before, you can find the old email and carry the acked-by.
>
> I fixed it but all this is submitter's job really.

Thanks, my bad, I missed it from v6.

>
>> > CC: Hari Bathini 
>> > CC: Tony Luck 
>> > CC: Vivek Goyal 
>> > Signed-off-by: Marc-André Lureau 
>> > Acked-by: Gabriel Somlo 
>> > ---
>> >  kernel/crash_core.c | 1 +
>> >  1 file changed, 1 insertion(+)
>> >
>> > diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> > index 4f63597c824d..a93590cdd9e1 100644
>> > --- a/kernel/crash_core.c
>> > +++ b/kernel/crash_core.c
>> > @@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
>> >  {
>> > return __pa(vmcoreinfo_note);
>> >  }
>> > +EXPORT_SYMBOL(paddr_vmcoreinfo_note);
>> >
>> >  static int __init crash_save_vmcoreinfo_init(void)
>> >  {
>> > --
>> > 2.16.1.73.g5832b7e9f2
>> >
>>
>> Thanks
>> Dave


Re: rmi4 defunct on T460p [was: [git pull] Input updates for v4.15-rc8]

2018-03-01 Thread Marc-André Lureau
Hi

On Tue, Feb 13, 2018 at 9:22 AM, Jiri Slaby  wrote:
> On 02/09/2018, 02:39 AM, Dmitry Torokhov wrote:
>> On Thu, Feb 08, 2018 at 11:04:22AM +0100, Jiri Slaby wrote:
>>> On 01/19/2018, 06:42 PM, Dmitry Torokhov wrote:
>>>> Please pull from:
>>>>
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git for-linus
>>>>
>>>> to receive updates for the input subsystem. You will get:
>>>>
>>> ...
>>>> - touchpad on Lenovo T640p is switched over to SMbus/RMI
>>> ...
>>>
>>>> 王振杰 (1):
>>>>   Input: synaptics - Lenovo Thinkpad T460p devices should use RMI
>>>
>>> Hi,
>>>
>>> one of openSUSE users with T460p reports that the touchpad is defunct
>>> after resume in 4.15. psmouse.synaptics_intertouch=0 works around the
>>> problem.
>>>
>>> The problem is:
>>>  kernel: rmi4_smbus 8-002c: failed to get SMBus version number!
>>>  kernel: rmi4_physical rmi4-00: rmi_driver_reset_handler: Failed to read
>>> current IRQ mask.
>>>  kernel: rmi4_f01 rmi4-00.fn01: Failed to restore normal operation: -16.
>>>  kernel: rmi4_f01 rmi4-00.fn01: Resume failed with code -16.
>>>  kernel: rmi4_physical rmi4-00: Failed to suspend functions: -16
>>>  kernel: rmi4_smbus 8-002c: Failed to resume device: -16
>>>  kernel: rmi4_f03 rmi4-00.fn03: rmi_f03_pt_write: Failed to write to F03
>>> TX register (-16).
>>>
>>> The downstream bug is at:
>>> https://bugzilla.suse.com/show_bug.cgi?id=1079862
>>>
>>> Any ideas?
>>
>> Hmm, maybe the seuence between psmouse and rmi/smbus is not quite right
>> on resume on that box. Can you ask the reporter to:
>>
>> echo 1 > /sys/power/pm_print_times
>> suspend/resume
>
> He uploaded it here:
> https://bugzilla.suse.com/attachment.cgi?id=759933
>

Same problem (with f27 and upstream kernel), is there anything else we
can do to help? Should we open a bug on http://bugzilla.kernel.org ?

thanks

-- 
Marc-André Lureau


Re: [PATCH v16 06/11] fw_cfg: handle fw_cfg_read_blob() error

2018-03-01 Thread Marc-André Lureau
Hi

On Wed, Feb 28, 2018 at 6:32 PM, Michael S. Tsirkin  wrote:
>> @@ -557,7 +566,10 @@ static int fw_cfg_sysfs_probe(struct platform_device 
>> *pdev)
>>   goto err_probe;
>>
>>   /* get revision number, add matching top-level attribute */
>> - fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
>> + err = fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
>> + if (err < 0)
>> + goto err_probe;
>> +
>>   fw_cfg_rev = le32_to_cpu(rev);
>>   err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
>>   if (err)
>
> So on this one, maybe we should just drop this attribute.
> Does anyone use it?
> Removing it will speed up boot slightly.

We can't skip reading FW_CFG_ID (rev) as we need it to check DMA
support. I don't mind if we remove the sysfs entry, but I doubt it
makes a difference in boot time.

>> --
>> 2.16.1.73.g5832b7e9f2


Re: [PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
Hi

On Wed, Feb 28, 2018 at 6:17 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 28, 2018 at 04:41:51PM +0100, Marc-André Lureau wrote:
>> I don't know if it's always safe to enable dma in read_raw(), how
>> could we know? Is there a check we could use to choose one or ther
>> other (and thus avoiding explicit dma/readfn argument)?
>
> IMHO the way to go is not to try to do zero copy.
> Allocate a buffer and DMA there, then copy.

Sounds fine to me, I'll resend this patch separately if the rest from
v16 is applied.

thanks


Re: [PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
Hi

On Wed, Feb 28, 2018 at 4:48 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 28, 2018 at 04:41:51PM +0100, Marc-André Lureau wrote:
>> I don't know if it's always safe to enable dma in read_raw(), how
>> could we know? Is there a check we could use to choose one or ther
>> other (and thus avoiding explicit dma/readfn argument)?
>
> I'm not sure - but does it really matter? Is anyone reading large files
> like this in production where speed matters?
> Why even bother with DMA?

The difference is quite significante for not so small files, as shown above.

And if they access the fw_cfg entries at boot time, or when starting
things etc, this may speed things up.


Re: [PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
On Wed, Feb 28, 2018 at 4:35 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 28, 2018 at 01:27:02PM +0100, Marc-André Lureau wrote:
>> Hi
>>
>> On Tue, Feb 27, 2018 at 1:04 AM, Michael S. Tsirkin  wrote:
>> > On Thu, Feb 15, 2018 at 10:33:12PM +0100, Marc-André Lureau wrote:
>> >> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> >> Return errors, because the operation may fail.
>> >>
>> >> So far, only one call in fw_cfg_register_dir_entries() is using
>> >> kmalloc'ed buf and is thus clearly eligible to DMA read.
>> >>
>> >> Initially, I didn't implement DMA read to speed up boot time, but as a
>> >> first step before introducing DMA write (since read operations were
>> >> already presents). Even more, I didn't realize fw-cfg entries were
>> >> being read by the kernel during boot by default. But actally fw-cfg
>> >> entries are being populated during module probe. I knew DMA improved a
>> >> lot bios boot time (the main reason the DMA interface was added
>> >> afaik). Let see the time it would take to read the whole ACPI
>> >> tables (128kb allocated)
>> >>
>> >>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>> >>   - with DMA: sys 0m0.003s
>> >>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>> >>
>> >> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> >> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> >> small (1-2kb). Since it does not expose itself, in order to measure
>> >> the time it takes to read such small file, I took a comparable sized
>> >> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> >> modified read_raw enabling DMA)
>> >>
>> >>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw 
>> >> >/dev/null
>> >>   - with DMA:
>> >>   0.636037  task-clock (msec) #0.141 CPUs 
>> >> utilized    ( +-  1.19% )
>> >>   - without DMA:
>> >>   6.430128  task-clock (msec) #0.622 CPUs 
>> >> utilized( +-  0.22% )
>> >>
>> >> That's a few msec saved during boot by enabling DMA read (the gain
>> >> would be more substantial if other & bigger fw-cfg entries are read by
>> >> others from sysfs, unfortunately, it's not clear if we can always
>> >> enable DMA there)
>> >>
>> >> Signed-off-by: Marc-André Lureau 
>> >> ---
>> >>  drivers/firmware/qemu_fw_cfg.c | 61 
>> >> ++
>> >>  1 file changed, 50 insertions(+), 11 deletions(-)
>> >>
>> >> diff --git a/drivers/firmware/qemu_fw_cfg.c 
>> >> b/drivers/firmware/qemu_fw_cfg.c
>> >> index 3015e77aebca..94df57e9be66 100644
>> >> --- a/drivers/firmware/qemu_fw_cfg.c
>> >> +++ b/drivers/firmware/qemu_fw_cfg.c
>> >> @@ -124,12 +124,47 @@ static ssize_t fw_cfg_dma_transfer(void *address, 
>> >> u32 length, u32 control)
>> >>   return ret;
>> >>  }
>> >>
>> >> +/* with acpi & dev locks taken */
>> >> +static ssize_t fw_cfg_read_blob_dma(u16 key,
>> >> + void *buf, loff_t pos, size_t count)
>> >> +{
>> >> + ssize_t ret;
>> >> +
>> >> + if (pos == 0) {
>> >> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> >> + | FW_CFG_DMA_CTL_SELECT
>> >> + | FW_CFG_DMA_CTL_READ);
>> >> + } else {
>> >> + fw_cfg_sel_endianness(key);
>> >> + ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
>> >> + if (ret < 0)
>> >> + return ret;
>> >> + ret = fw_cfg_dma_transfer(buf, count,
>> >> + FW_CFG_DMA_CTL_READ);
>> >> + }
>> >> +
>> >> + return ret;
>> >> +}
>> >> +
>> >> +/* with acpi & dev locks taken */
>> >> +static ssize_t fw_cfg_read_blob_io(u16 key,
>> >> + void *buf, loff_t pos, size_t count)
>> >> +{
>> >> + fw_cf

[PATCH v16 01/11] fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()

2018-02-28 Thread Marc-André Lureau
Dispatch to the appropriate iowrite() instead of casting restricted
type to u16.

- if fw_cfg_is_mmio:
  before: iowrite16(cpu_to_be16(key))
  after: iowrite16be(key)
- if !fw_cfg_is_mmio:
  before: iowrite16(cpu_to_le16(key))
  after: iowrite16(key)
  which is equivalent on little-endian systems, where fw_cfg IO is supported.

Fixes:
$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:55:33: warning: restricted __be16 degrades to 
integer
drivers/firmware/qemu_fw_cfg.c:55:52: warning: restricted __le16 degrades to 
integer

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index a41b572eeeb1..e7ea2b3b1d11 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -68,9 +68,12 @@ static void __iomem *fw_cfg_reg_data;
 static DEFINE_MUTEX(fw_cfg_dev_lock);
 
 /* pick appropriate endianness for selector key */
-static inline u16 fw_cfg_sel_endianness(u16 key)
+static void fw_cfg_sel_endianness(u16 key)
 {
-   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
+   if (fw_cfg_is_mmio)
+   iowrite16be(key, fw_cfg_reg_ctrl);
+   else
+   iowrite16(key, fw_cfg_reg_ctrl);
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
@@ -92,7 +95,7 @@ static inline void fw_cfg_read_blob(u16 key,
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   fw_cfg_sel_endianness(key);
while (pos-- > 0)
ioread8(fw_cfg_reg_data);
ioread8_rep(fw_cfg_reg_data, buf, count);
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 04/11] fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read

2018-02-28 Thread Marc-André Lureau
Use struct fw_cfg_files to read the directory size, fixing the sparse
warnings:

drivers/firmware/qemu_fw_cfg.c:485:17: warning: cast to restricted __be32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0eb155fdfb35..00ad9b862414 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -496,19 +496,20 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
 static int fw_cfg_register_dir_entries(void)
 {
int ret = 0;
+   __be32 files_count;
u32 count, i;
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(count));
-   count = be32_to_cpu(count);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count, 0, sizeof(files_count));
+   count = be32_to_cpu(files_count);
dir_size = count * sizeof(struct fw_cfg_file);
 
dir = kmalloc(dir_size, GFP_KERNEL);
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file(&dir[i]);
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 06/11] fw_cfg: handle fw_cfg_read_blob() error

2018-02-28 Thread Marc-André Lureau
fw_cfg_read_blob() may fail, but does not return error. This may lead
to surprising behaviours, like populating zero file entries (in
register_file() or during read). Return an error if ACPI locking
failed. Also, the following DMA read/write extension will add more
error paths that should be handled appropriately.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0cc71d028ae3..45bfc389b226 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -77,8 +77,8 @@ static void fw_cfg_sel_endianness(u16 key)
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count)
 {
u32 glk = -1U;
acpi_status status;
@@ -91,7 +91,7 @@ static void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
@@ -102,6 +102,7 @@ static void fw_cfg_read_blob(u16 key,
mutex_unlock(&fw_cfg_dev_lock);
 
acpi_release_global_lock(glk);
+   return count;
 }
 
 /* clean up fw_cfg device i/o */
@@ -183,8 +184,9 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
}
 
/* verify fw_cfg device signature */
-   fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
-   if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
+   if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
+   0, FW_CFG_SIG_SIZE) < 0 ||
+   memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
fw_cfg_io_cleanup();
return -ENODEV;
}
@@ -344,8 +346,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
if (count > entry->size - pos)
count = entry->size - pos;
 
-   fw_cfg_read_blob(entry->select, buf, pos, count);
-   return count;
+   return fw_cfg_read_blob(entry->select, buf, pos, count);
 }
 
 static struct bin_attribute fw_cfg_sysfs_attr_raw = {
@@ -501,7 +502,11 @@ static int fw_cfg_register_dir_entries(void)
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count, 0, sizeof(files_count));
+   ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count,
+   0, sizeof(files_count));
+   if (ret < 0)
+   return ret;
+
count = be32_to_cpu(files_count);
dir_size = count * sizeof(struct fw_cfg_file);
 
@@ -509,7 +514,10 @@ static int fw_cfg_register_dir_entries(void)
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
+   ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
+   sizeof(files_count), dir_size);
+   if (ret < 0)
+   goto end;
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file(&dir[i]);
@@ -517,6 +525,7 @@ static int fw_cfg_register_dir_entries(void)
break;
}
 
+end:
kfree(dir);
return ret;
 }
@@ -557,7 +566,10 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
 
/* get revision number, add matching top-level attribute */
-   fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   err = fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   if (err < 0)
+   goto err_probe;
+
fw_cfg_rev = le32_to_cpu(rev);
err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
if (err)
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 05/11] fw_cfg: remove inline from fw_cfg_read_blob()

2018-02-28 Thread Marc-André Lureau
The function is not small and getting bigger.

Let the compiler decide instead. No profiling done, hopefully
unnecessary.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 00ad9b862414..0cc71d028ae3 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -77,8 +77,8 @@ static void fw_cfg_sel_endianness(u16 key)
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static void fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count)
 {
u32 glk = -1U;
acpi_status status;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 09/11] crash: export paddr_vmcoreinfo_note()

2018-02-28 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 10/11] fw_cfg: write vmcoreinfo details

2018-02-28 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c   | 145 ++-
 include/uapi/linux/qemu_fw_cfg.h |  31 +
 2 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index df028faa2d00..14fedbeca724 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -34,11 +34,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
 MODULE_LICENSE("GPL");
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -60,6 +66,66 @@ static void fw_cfg_sel_endianness(u16 key)
iowrite16(key, fw_cfg_reg_ctrl);
 }
 
+#ifdef CONFIG_CRASH_CORE
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
+{
+   for (;;) {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   /* do not reorder the read to d->control */
+   rmb();
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   cpu_relax();
+   }
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma_access *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   /* fw_cfg device does not need IOMMU protection, so use physical 
addresses */
+   *d = (struct fw_cfg_dma_access) {
+   .address = cpu_to_be64(address ? virt_to_phys(address) : 0),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   /* force memory to sync before notifying device via MMIO */
+   wmb();
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+#endif
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static ssize_t fw_cfg_read_blob(u16 key,
void *buf, loff_t pos, size_t count)
@@ -89,6 +155,47 @@ static ssize_t fw_cfg_read_blob(u16 key,
return count;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   fw_cfg_sel_endianness(key);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -188,9 +295,6 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
return 0;
 }
 
-/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg t

[PATCH v16 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

So far, only one call in fw_cfg_register_dir_entries() is using
kmalloc'ed buf and is thus clearly eligible to DMA read.

Initially, I didn't implement DMA read to speed up boot time, but as a
first step before introducing DMA write (since read operations were
already presents). Even more, I didn't realize fw-cfg entries were
being read by the kernel during boot by default. But actally fw-cfg
entries are being populated during module probe. I knew DMA improved a
lot bios boot time (the main reason the DMA interface was added
afaik). Let see the time it would take to read the whole ACPI
tables (128kb allocated)

 # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
  - with DMA: sys 0m0.003s
  - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s

FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
boot to populate sysfs qemu_fw_cfg directory, and it is quite
small (1-2kb). Since it does not expose itself, in order to measure
the time it takes to read such small file, I took a comparable sized
file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
modified read_raw enabling DMA)

 # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
  - with DMA:
  0.636037  task-clock (msec) #0.141 CPUs utilized  
  ( +-  1.19% )
  - without DMA:
  6.430128  task-clock (msec) #0.622 CPUs utilized  
  ( +-  0.22% )

That's a few msec saved during boot by enabling DMA read (the gain
would be more substantial if other & bigger fw-cfg entries are read by
others from sysfs, unfortunately, it's not clear if we can always
enable DMA there)

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 68 ++
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 14fedbeca724..db1cba4f99bd 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -66,7 +66,6 @@ static void fw_cfg_sel_endianness(u16 key)
iowrite16(key, fw_cfg_reg_ctrl);
 }
 
-#ifdef CONFIG_CRASH_CORE
 static inline bool fw_cfg_dma_enabled(void)
 {
return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
@@ -124,14 +123,49 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
 
return ret;
 }
-#endif
+
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_dma(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   ssize_t ret;
+
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+   | FW_CFG_DMA_CTL_SELECT
+   | FW_CFG_DMA_CTL_READ);
+   } else {
+   fw_cfg_sel_endianness(key);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   return ret;
+   ret = fw_cfg_dma_transfer(buf, count,
+   FW_CFG_DMA_CTL_READ);
+   }
+
+   return ret;
+}
+
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_io(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   fw_cfg_sel_endianness(key);
+   while (pos-- > 0)
+   ioread8(fw_cfg_reg_data);
+   ioread8_rep(fw_cfg_reg_data, buf, count);
+   return count;
+}
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static ssize_t fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+   void *buf, loff_t pos, size_t count,
+   ssize_t (*readfn)(u16 key, void *buf,
+   loff_t pos, size_t count))
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -145,14 +179,19 @@ static ssize_t fw_cfg_read_blob(u16 key,
}
 
mutex_lock(&fw_cfg_dev_lock);
-   fw_cfg_sel_endianness(key);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+
+   /* fallback to IO if DMA is not available */
+   if (readfn == fw_cfg_read_blob_dma && !fw_cfg_dma_enabled()) {
+   readfn = fw_cfg_read_blob_io;
+   }
+
+   ret = readfn(key, buf, pos, count);
+
mutex_unlock(&fw_cfg_dev_lock);
 
acpi_release_global_lock(glk);
-   return count;
+
+   return ret;
 }
 
 #ifdef CONFIG_CRASH_CORE
@@ -286,7 +32

[PATCH v16 08/11] fw_cfg: add DMA register

2018-02-28 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5de6bb406fb6..df028faa2d00 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -45,6 +46,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -104,12 +106,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -119,7 +123,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -156,6 +160,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -165,6 +170,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
0, FW_CFG_SIG_SIZE) < 0 ||
@@ -630,6 +642,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -639,12 +652,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
phys_addr_t base;
-

[PATCH v16 07/11] fw_cfg: add a public uapi header

2018-02-28 Thread Marc-André Lureau
Create a common header file for well-known values and structures to be
shared by the Linux kernel with qemu or other projects.

It is based from qemu/docs/specs/fw_cfg.txt which references
qemu/include/hw/nvram/fw_cfg_keys.h "for the most up-to-date and
authoritative list" & vmcoreinfo.txt. Those files don't have an
explicit license, but qemu/hw/nvram/fw_cfg.c is BSD-license, so
Michael S. Tsirkin suggested to use the same license.

The patch intentionally left out DMA & vmcoreinfo structures &
defines, which are added in the commits making usage of it.

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Marc-André Lureau 

---

The related qemu patch making use of it, to be submitted:
https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
---
 MAINTAINERS  |  1 +
 drivers/firmware/qemu_fw_cfg.c   | 22 ++
 include/uapi/linux/qemu_fw_cfg.h | 66 
 3 files changed, 69 insertions(+), 20 deletions(-)
 create mode 100644 include/uapi/linux/qemu_fw_cfg.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 93a12af4f180..0ce1b3e536fd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11353,6 +11353,7 @@ M:  "Michael S. Tsirkin" 
 L: qemu-de...@nongnu.org
 S: Maintained
 F: drivers/firmware/qemu_fw_cfg.c
+F: include/uapi/linux/qemu_fw_cfg.h
 
 QIB DRIVER
 M: Dennis Dalessandro 
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 45bfc389b226..5de6bb406fb6 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -32,30 +32,12 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
 MODULE_LICENSE("GPL");
 
-/* selector key values for "well-known" fw_cfg entries */
-#define FW_CFG_SIGNATURE  0x00
-#define FW_CFG_ID 0x01
-#define FW_CFG_FILE_DIR   0x19
-
-/* size in bytes of fw_cfg signature */
-#define FW_CFG_SIG_SIZE 4
-
-/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
-#define FW_CFG_MAX_FILE_PATH 56
-
-/* fw_cfg file directory entry type */
-struct fw_cfg_file {
-   u32 size;
-   u16 select;
-   u16 reserved;
-   char name[FW_CFG_MAX_FILE_PATH];
-};
-
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -616,7 +598,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
-   { "QEMU0002", },
+   { FW_CFG_ACPI_DEVICE_ID, },
{},
 };
 MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
diff --git a/include/uapi/linux/qemu_fw_cfg.h b/include/uapi/linux/qemu_fw_cfg.h
new file mode 100644
index ..c698ac3812f6
--- /dev/null
+++ b/include/uapi/linux/qemu_fw_cfg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+#ifndef _LINUX_FW_CFG_H
+#define _LINUX_FW_CFG_H
+
+#include 
+
+#define FW_CFG_ACPI_DEVICE_ID  "QEMU0002"
+
+/* selector key values for "well-known" fw_cfg entries */
+#define FW_CFG_SIGNATURE   0x00
+#define FW_CFG_ID  0x01
+#define FW_CFG_UUID0x02
+#define FW_CFG_RAM_SIZE0x03
+#define FW_CFG_NOGRAPHIC   0x04
+#define FW_CFG_NB_CPUS 0x05
+#define FW_CFG_MACHINE_ID  0x06
+#define FW_CFG_KERNEL_ADDR 0x07
+#define FW_CFG_KERNEL_SIZE 0x08
+#define FW_CFG_KERNEL_CMDLINE  0x09
+#define FW_CFG_INITRD_ADDR 0x0a
+#define FW_CFG_INITRD_SIZE 0x0b
+#define FW_CFG_BOOT_DEVICE 0x0c
+#define FW_CFG_NUMA0x0d
+#define FW_CFG_BOOT_MENU   0x0e
+#define FW_CFG_MAX_CPUS0x0f
+#define FW_CFG_KERNEL_ENTRY0x10
+#define FW_CFG_KERNEL_DATA 0x11
+#define FW_CFG_INITRD_DATA 0x12
+#define FW_CFG_CMDLINE_ADDR0x13
+#define FW_CFG_CMDLINE_SIZE0x14
+#define FW_CFG_CMDLINE_DATA0x15
+#define FW_CFG_SETUP_ADDR  0x16
+#define FW_CFG_SETUP_SIZE  0x17
+#define FW_CFG_SETUP_DATA  0x18
+#define FW_CFG_FILE_DIR0x19
+
+#define FW_CFG_FILE_FIRST  0x20
+#define FW_CFG_FILE_SLOTS_MIN  0x10
+
+#define FW_CFG_WRITE_CHANNEL   0x4000
+#define FW_CFG_ARCH_LOCAL  0x8000
+#define FW_CFG_ENTRY_MASK  (~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
+
+#define FW_CFG_INVALID 0x
+
+/* width in bytes of fw_cfg control register */
+#define FW_CFG_CTL_SIZE0x02
+
+/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
+#define FW_CFG_MAX_FILE_PATH   56
+
+/* size in bytes of fw_cfg signature */
+#define FW_CFG_SIG_SIZE 4
+
+/* FW_CFG_ID bits */
+#define FW_CFG_VERSION 0x01
+
+/* fw_cfg file directory entry type */
+struct fw_cfg_file {
+   __be32 size;
+   __be16 select;
+   __u16 reserved;
+   char name[FW_CFG_MAX_FILE_PATH];
+};
+
+#endif
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 03/11] fw_cfg: fix sparse warning reading FW_CFG_ID

2018-02-28 Thread Marc-André Lureau
Use a restricted type for reading FW_CFG_ID, fixing sparse warning:

drivers/firmware/qemu_fw_cfg.c:540:22: warning: cast to restricted __le32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 7978844f6b37..0eb155fdfb35 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -530,6 +530,7 @@ static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
 static int fw_cfg_sysfs_probe(struct platform_device *pdev)
 {
int err;
+   __le32 rev;
 
/* NOTE: If we supported multiple fw_cfg devices, we'd first create
 * a subdirectory named after e.g. pdev->id, then hang per-device
@@ -555,8 +556,8 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
 
/* get revision number, add matching top-level attribute */
-   fw_cfg_read_blob(FW_CFG_ID, &fw_cfg_rev, 0, sizeof(fw_cfg_rev));
-   fw_cfg_rev = le32_to_cpu(fw_cfg_rev);
+   fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   fw_cfg_rev = le32_to_cpu(rev);
err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
if (err)
goto err_rev;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 02/11] fw_cfg: fix sparse warnings with fw_cfg_file

2018-02-28 Thread Marc-André Lureau
Modify fw_cfg_sysfs_entry to store entry values, instead of reusing
the restricted types.

Fixes warnings such as:

$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:491:29: warning: incorrect type in assignment 
(different base types)
drivers/firmware/qemu_fw_cfg.c:491:29:expected restricted __be32 [usertype] 
size
drivers/firmware/qemu_fw_cfg.c:491:29:got unsigned int
drivers/firmware/qemu_fw_cfg.c:492:31: warning: incorrect type in assignment 
(different base types)
drivers/firmware/qemu_fw_cfg.c:492:31:expected restricted __be16 [usertype] 
select
drivers/firmware/qemu_fw_cfg.c:492:31:got int

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index e7ea2b3b1d11..7978844f6b37 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -211,7 +211,9 @@ static const struct {
 /* fw_cfg_sysfs_entry type */
 struct fw_cfg_sysfs_entry {
struct kobject kobj;
-   struct fw_cfg_file f;
+   u32 size;
+   u16 select;
+   char name[FW_CFG_MAX_FILE_PATH];
struct list_head list;
 };
 
@@ -275,17 +277,17 @@ struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = 
{ \
 
 static ssize_t fw_cfg_sysfs_show_size(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%u\n", e->f.size);
+   return sprintf(buf, "%u\n", e->size);
 }
 
 static ssize_t fw_cfg_sysfs_show_key(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%u\n", e->f.select);
+   return sprintf(buf, "%u\n", e->select);
 }
 
 static ssize_t fw_cfg_sysfs_show_name(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%s\n", e->f.name);
+   return sprintf(buf, "%s\n", e->name);
 }
 
 static FW_CFG_SYSFS_ATTR(size);
@@ -336,13 +338,13 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
 {
struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
 
-   if (pos > entry->f.size)
+   if (pos > entry->size)
return -EINVAL;
 
-   if (count > entry->f.size - pos)
-   count = entry->f.size - pos;
+   if (count > entry->size - pos)
+   count = entry->size - pos;
 
-   fw_cfg_read_blob(entry->f.select, buf, pos, count);
+   fw_cfg_read_blob(entry->select, buf, pos, count);
return count;
 }
 
@@ -461,11 +463,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
return -ENOMEM;
 
/* set file entry information */
-   memcpy(&entry->f, f, sizeof(struct fw_cfg_file));
+   entry->size = be32_to_cpu(f->size);
+   entry->select = be16_to_cpu(f->select);
+   memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
 
/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
-  fw_cfg_sel_ko, "%d", entry->f.select);
+  fw_cfg_sel_ko, "%d", entry->select);
if (err)
goto err_register;
 
@@ -475,7 +479,7 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
goto err_add_raw;
 
/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
-   fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->f.name);
+   fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
 
/* success, add entry to global cache */
fw_cfg_sysfs_cache_enlist(entry);
@@ -507,8 +511,6 @@ static int fw_cfg_register_dir_entries(void)
fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
 
for (i = 0; i < count; i++) {
-   dir[i].size = be32_to_cpu(dir[i].size);
-   dir[i].select = be16_to_cpu(dir[i].select);
ret = fw_cfg_register_file(&dir[i]);
if (ret)
break;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v16 00/11] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-02-28 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (entry added since qemu 2.11 with -device vmcoreinfo).

v16:
- patch reordering
- commit message updates
- rename fw_cfg.h -> qemu_fw_cfg.h
- fix warning when CONFIG_CRASH_CORE=n without RFC DMA read patch
  applied
- use a function pointer rather than a dma hint to read_blob()

v15:
- fix fw_cfg.h uapi header #include
- use BSD license for fw_cfg.h uapi header
- move the uapi defines/structs for DMA & vmcoreinfo in the
  corresponding patch
- use cpu_relax() instead of usleep_range(50, 100);
- replace do { } while(true) by for (;;)
- fix the rmb() call location
- add a preliminary patch to handle error from fw_cfg_write_blob()
- rewrite fw_cfg_sel_endianness() to wrap iowrite() calls

v14:
- add "fw_cfg: add a public uapi header"
- fix sparse warnings & don't introduce new warnings
- add memory barriers to force IO ordering
- split fw_cfg_read_blob() in fw_cfg_read_blob_io() and
  fw_cfg_read_blob_dma()
- add error handling to fw_cfg_read_blob() callers
- minor stylistic changes

v13:
- reorder patch series, introduce DMA write before DMA read
- do some measurements of DMA read speed-ups

v12:
- fix virt_to_phys(NULL) panic with CONFIG_DEBUG_VIRTUAL=y
- do not use DMA read, except for kmalloc() memory we allocated
  ourself (only fw_cfg_register_dir_entries() so far)

v11:
- add #include  in last patch,
  fixing kbuild .config test

Marc-André Lureau (11):
  fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()
  fw_cfg: fix sparse warnings with fw_cfg_file
  fw_cfg: fix sparse warning reading FW_CFG_ID
  fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
  fw_cfg: remove inline from fw_cfg_read_blob()
  fw_cfg: handle fw_cfg_read_blob() error
  fw_cfg: add a public uapi header
  fw_cfg: add DMA register
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details
  RFC: fw_cfg: do DMA read operation

 MAINTAINERS  |   1 +
 drivers/firmware/qemu_fw_cfg.c   | 339 +++
 include/uapi/linux/qemu_fw_cfg.h |  97 +++
 kernel/crash_core.c  |   1 +
 4 files changed, 374 insertions(+), 64 deletions(-)
 create mode 100644 include/uapi/linux/qemu_fw_cfg.h

-- 
2.16.1.73.g5832b7e9f2



Re: [PATCH] fw_cfg: avoid unused function warning

2018-02-28 Thread Marc-André Lureau
Hi

On Wed, Feb 28, 2018 at 2:33 PM, Arnd Bergmann  wrote:
> The newly introduced fw_cfg_dma_transfer() function is unused when
> CONFIG_CRASH_CORE is disabled:
>
> drivers/firmware/qemu_fw_cfg.c:89:16: error: 'fw_cfg_dma_transfer' defined 
> but not used [-Werror=unused-function]
>  static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>
> This moves it into the #ifdef section that hides its caller to avoid the
> warning.
>
> Fixes: 47e78bfb5426 ("fw_cfg: write vmcoreinfo details")
> Signed-off-by: Arnd Bergmann 

I don't know from which tree you found this, I suppose from
mst/linux-next (which btw, I don't know which version got partially
applied).

This is a regression in v13, because dma read is last patch now, as RFC.
Fixed in upcoming v16.

thanks

> ---
>  drivers/firmware/qemu_fw_cfg.c | 60 
> +-
>  1 file changed, 30 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 3015e77aebca..f002bb40519b 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -66,6 +66,36 @@ static void fw_cfg_sel_endianness(u16 key)
> iowrite16(key, fw_cfg_reg_ctrl);
>  }
>
> +/* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
> +static ssize_t fw_cfg_read_blob(u16 key,
> +   void *buf, loff_t pos, size_t count)
> +{
> +   u32 glk = -1U;
> +   acpi_status status;
> +
> +   /* If we have ACPI, ensure mutual exclusion against any potential
> +* device access by the firmware, e.g. via AML methods:
> +*/
> +   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
> +   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
> +   /* Should never get here */
> +   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
> +   memset(buf, 0, count);
> +   return -EINVAL;
> +   }
> +
> +   mutex_lock(&fw_cfg_dev_lock);
> +   fw_cfg_sel_endianness(key);
> +   while (pos-- > 0)
> +   ioread8(fw_cfg_reg_data);
> +   ioread8_rep(fw_cfg_reg_data, buf, count);
> +   mutex_unlock(&fw_cfg_dev_lock);
> +
> +   acpi_release_global_lock(glk);
> +   return count;
> +}
> +
> +#ifdef CONFIG_CRASH_CORE
>  static inline bool fw_cfg_dma_enabled(void)
>  {
> return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
> @@ -124,36 +154,6 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
> length, u32 control)
> return ret;
>  }
>
> -/* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
> -static ssize_t fw_cfg_read_blob(u16 key,
> -   void *buf, loff_t pos, size_t count)
> -{
> -   u32 glk = -1U;
> -   acpi_status status;
> -
> -   /* If we have ACPI, ensure mutual exclusion against any potential
> -* device access by the firmware, e.g. via AML methods:
> -*/
> -   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
> -   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
> -   /* Should never get here */
> -   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
> -   memset(buf, 0, count);
> -   return -EINVAL;
> -   }
> -
> -   mutex_lock(&fw_cfg_dev_lock);
> -   fw_cfg_sel_endianness(key);
> -   while (pos-- > 0)
> -   ioread8(fw_cfg_reg_data);
> -   ioread8_rep(fw_cfg_reg_data, buf, count);
> -   mutex_unlock(&fw_cfg_dev_lock);
> -
> -   acpi_release_global_lock(glk);
> -   return count;
> -}
> -
> -#ifdef CONFIG_CRASH_CORE
>  /* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
>  static ssize_t fw_cfg_write_blob(u16 key,
>  void *buf, loff_t pos, size_t count)
> --
> 2.9.0
>


Re: [PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-28 Thread Marc-André Lureau
Hi

On Tue, Feb 27, 2018 at 1:04 AM, Michael S. Tsirkin  wrote:
> On Thu, Feb 15, 2018 at 10:33:12PM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> So far, only one call in fw_cfg_register_dir_entries() is using
>> kmalloc'ed buf and is thus clearly eligible to DMA read.
>>
>> Initially, I didn't implement DMA read to speed up boot time, but as a
>> first step before introducing DMA write (since read operations were
>> already presents). Even more, I didn't realize fw-cfg entries were
>> being read by the kernel during boot by default. But actally fw-cfg
>> entries are being populated during module probe. I knew DMA improved a
>> lot bios boot time (the main reason the DMA interface was added
>> afaik). Let see the time it would take to read the whole ACPI
>> tables (128kb allocated)
>>
>>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>>   - with DMA: sys 0m0.003s
>>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>>
>> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> small (1-2kb). Since it does not expose itself, in order to measure
>> the time it takes to read such small file, I took a comparable sized
>> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> modified read_raw enabling DMA)
>>
>>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
>>   - with DMA:
>>   0.636037  task-clock (msec) #0.141 CPUs utilized   
>>  ( +-  1.19% )
>>   - without DMA:
>>   6.430128  task-clock (msec) #0.622 CPUs utilized   
>>  ( +-  0.22% )
>>
>> That's a few msec saved during boot by enabling DMA read (the gain
>> would be more substantial if other & bigger fw-cfg entries are read by
>> others from sysfs, unfortunately, it's not clear if we can always
>> enable DMA there)
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 61 
>> ++
>>  1 file changed, 50 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 3015e77aebca..94df57e9be66 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -124,12 +124,47 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
>> length, u32 control)
>>   return ret;
>>  }
>>
>> +/* with acpi & dev locks taken */
>> +static ssize_t fw_cfg_read_blob_dma(u16 key,
>> + void *buf, loff_t pos, size_t count)
>> +{
>> + ssize_t ret;
>> +
>> + if (pos == 0) {
>> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> + | FW_CFG_DMA_CTL_SELECT
>> + | FW_CFG_DMA_CTL_READ);
>> + } else {
>> + fw_cfg_sel_endianness(key);
>> + ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
>> + if (ret < 0)
>> + return ret;
>> + ret = fw_cfg_dma_transfer(buf, count,
>> + FW_CFG_DMA_CTL_READ);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/* with acpi & dev locks taken */
>> +static ssize_t fw_cfg_read_blob_io(u16 key,
>> + void *buf, loff_t pos, size_t count)
>> +{
>> + fw_cfg_sel_endianness(key);
>> + while (pos-- > 0)
>> + ioread8(fw_cfg_reg_data);
>> + ioread8_rep(fw_cfg_reg_data, buf, count);
>> + return count;
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>>  static ssize_t fw_cfg_read_blob(u16 key,
>> - void *buf, loff_t pos, size_t count)
>> + void *buf, loff_t pos, size_t count,
>> + bool dma)
>>  {
>>   u32 glk = -1U;
>>   acpi_status status;
>> + ssize_t ret;
>>
>>   /* If we have ACPI, ensure mutual exclusion against any potential
>>* device access by the firmware, e.g. via AML methods:
>
> so this adds a dma flag to fw_cfg_read_blob.
>
>
>
>> @@ -143,14 +178,17 @@ static ssize

Re: [PATCH v15 10/11] fw_cfg: write vmcoreinfo details

2018-02-28 Thread Marc-André Lureau
Hi

On Tue, Feb 27, 2018 at 1:28 AM, Michael S. Tsirkin  wrote:
> On Thu, Feb 15, 2018 at 10:33:11PM +0100, Marc-André Lureau wrote:
>> If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
>> the kdump kernel, write the addr/size of the vmcoreinfo ELF note.
>>
>> The DMA operation is expected to run synchronously with today qemu,
>> but the specification states that it may become async, so we run
>> "control" field check in a loop for eventual changes.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 143 
>> -
>>  include/uapi/linux/fw_cfg.h|  31 +
>>  2 files changed, 171 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index c28bec4b5663..3015e77aebca 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -34,11 +34,17 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
>> +static u32 fw_cfg_rev;
>> +
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -60,6 +66,64 @@ static void fw_cfg_sel_endianness(u16 key)
>>   iowrite16(key, fw_cfg_reg_ctrl);
>>  }
>>
>> +static inline bool fw_cfg_dma_enabled(void)
>> +{
>> + return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
>> +}
>> +
>> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> +static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
>> +{
>> + for (;;) {
>> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> +
>> + /* do not reorder the read to d->control */
>> + rmb();
>> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> + return;
>> +
>> + cpu_relax();
>> + }
>> +}
>> +
>> +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>> +{
>> + phys_addr_t dma;
>> + struct fw_cfg_dma_access *d = NULL;
>> + ssize_t ret = length;
>> +
>> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> + if (!d) {
>> + ret = -ENOMEM;
>> + goto end;
>> + }
>> +
>> + /* fw_cfg device does not need IOMMU protection, so use physical 
>> addresses */
>> + *d = (struct fw_cfg_dma_access) {
>> + .address = cpu_to_be64(address ? virt_to_phys(address) : 0),
>> + .length = cpu_to_be32(length),
>> + .control = cpu_to_be32(control)
>> + };
>> +
>> + dma = virt_to_phys(d);
>> +
>> + iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> + /* force memory to sync before notifying device via MMIO */
>> + wmb();
>> + iowrite32be(dma, fw_cfg_reg_dma + 4);
>> +
>> + fw_cfg_wait_for_control(d);
>> +
>> + if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> + ret = -EIO;
>> + }
>> +
>> +end:
>> + kfree(d);
>> +
>> + return ret;
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>>  static ssize_t fw_cfg_read_blob(u16 key,
>>   void *buf, loff_t pos, size_t count)
>> @@ -89,6 +153,47 @@ static ssize_t fw_cfg_read_blob(u16 key,
>>   return count;
>>  }
>>
>> +#ifdef CONFIG_CRASH_CORE
>> +/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> +static ssize_t fw_cfg_write_blob(u16 key,
>> +  void *buf, loff_t pos, size_t count)
>> +{
>> + u32 glk = -1U;
>> + acpi_status status;
>> + ssize_t ret = count;
>> +
>> + /* If we have ACPI, ensure mutual exclusion against any potential
>> +  * device access by the firmware, e.g. via AML methods:
>> +  */
>> + status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
>> + if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
>> + /* Should never 

Re: [PATCH v15 02/11] fw_cfg: add a public uapi header

2018-02-28 Thread Marc-André Lureau
On Tue, Feb 27, 2018 at 1:06 AM, Michael S. Tsirkin  wrote:
> On Thu, Feb 15, 2018 at 10:33:03PM +0100, Marc-André Lureau wrote:
>> Create a common header file for well-known values and structures to be
>> shared by the Linux kernel with qemu or other projects.
>>
>> It is based from qemu/docs/specs/fw_cfg.txt which references
>> qemu/include/hw/nvram/fw_cfg_keys.h "for the most up-to-date and
>> authoritative list" & vmcoreinfo.txt. Those files don't have an
>> explicit license, but qemu/hw/nvram/fw_cfg.c is BSD-license, so
>> Michael S. Tsirkin suggested to use the same license.
>>
>> The patch intentionally left out DMA & vmcoreinfo structures &
>> defines, which are added in the commits making usage of it.
>>
>> Suggested-by: Michael S. Tsirkin 
>> Signed-off-by: Marc-André Lureau 
>>
>> ---
>>
>> The related qemu patch making use of it, to be submitted:
>> https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
>> ---
>>  MAINTAINERS|  1 +
>>  drivers/firmware/qemu_fw_cfg.c | 22 ++
>>  include/uapi/linux/fw_cfg.h| 66 
>> ++
>>  3 files changed, 69 insertions(+), 20 deletions(-)
>>  create mode 100644 include/uapi/linux/fw_cfg.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 3bdc260e36b7..a66b65f62811 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -11352,6 +11352,7 @@ M:"Michael S. Tsirkin" 
>>  L:   qemu-de...@nongnu.org
>>  S:   Maintained
>>  F:   drivers/firmware/qemu_fw_cfg.c
>> +F:   include/uapi/linux/fw_cfg.h
>>
>>  QIB DRIVER
>>  M:   Dennis Dalessandro 
>
> Why fw_cfg.h and not qemu_fw_cfg.h ? fw_cfg.h seems too generic.

ok

>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index a41b572eeeb1..42601a3eaed5 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -32,30 +32,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> -/* selector key values for "well-known" fw_cfg entries */
>> -#define FW_CFG_SIGNATURE  0x00
>> -#define FW_CFG_ID 0x01
>> -#define FW_CFG_FILE_DIR   0x19
>> -
>> -/* size in bytes of fw_cfg signature */
>> -#define FW_CFG_SIG_SIZE 4
>> -
>> -/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> -#define FW_CFG_MAX_FILE_PATH 56
>> -
>> -/* fw_cfg file directory entry type */
>> -struct fw_cfg_file {
>> - u32 size;
>> - u16 select;
>> - u16 reserved;
>> - char name[FW_CFG_MAX_FILE_PATH];
>> -};
>> -
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
>>
>>  #ifdef CONFIG_ACPI
>>  static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
>> - { "QEMU0002", },
>> + { FW_CFG_ACPI_DEVICE_ID, },
>>   {},
>>  };
>>  MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
>> diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
>> new file mode 100644
>> index ..c698ac3812f6
>> --- /dev/null
>> +++ b/include/uapi/linux/fw_cfg.h
>> @@ -0,0 +1,66 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause */
>> +#ifndef _LINUX_FW_CFG_H
>> +#define _LINUX_FW_CFG_H
>> +
>> +#include 
>> +
>> +#define FW_CFG_ACPI_DEVICE_ID"QEMU0002"
>> +
>> +/* selector key values for "well-known" fw_cfg entries */
>> +#define FW_CFG_SIGNATURE 0x00
>> +#define FW_CFG_ID0x01
>> +#define FW_CFG_UUID  0x02
>> +#define FW_CFG_RAM_SIZE  0x03
>> +#define FW_CFG_NOGRAPHIC 0x04
>> +#define FW_CFG_NB_CPUS   0x05
>> +#define FW_CFG_MACHINE_ID0x06
>> +#define FW_CFG_KERNEL_ADDR   0x07
>> +#define FW_CFG_KERNEL_SIZE   0x08
>> +#define FW_CFG_KERNEL_CMDLINE0x09
>> +#define FW_CFG_INITRD_ADDR   0x0a
>> +#define FW_CFG_INITRD_SIZE   0x0b
>> +#define FW_CFG_BOOT_DEVICE   0x0c
>> +#define FW_CFG_NUMA  0x0d
>> +#define FW_CFG_BOOT_MENU 0x0e
>> +#define FW_CFG_MAX_CPUS  0x0f
>>

Re: [PATCH v15 08/11] fw_cfg: handle fw_cfg_read_blob() error

2018-02-28 Thread Marc-André Lureau
Hi

On Tue, Feb 27, 2018 at 1:20 AM, Michael S. Tsirkin  wrote:
> On Thu, Feb 15, 2018 at 10:33:09PM +0100, Marc-André Lureau wrote:
>> fw_cfg_read_blob() may fail, but does not return error. This may lead
>> to undefined behaviours, such as a memcmp(sig, "QEMU") on uninitilized
>> memory.
>
> I don't think that's true - there's a memset there that
> will initialize the memory. probe is likely the only
> case where it returns a slightly incorrect data.

Right, I'll update the commit message.

>> Return an error if ACPI locking failed. Also, the following
>> DMA read/write extension will add more error paths that should be
>> handled appropriately.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 32 ++--
>>  1 file changed, 22 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index f6f90bef604c..5e6e5ac71dab 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -59,8 +59,8 @@ static void fw_cfg_sel_endianness(u16 key)
>>  }
>>
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> -static void fw_cfg_read_blob(u16 key,
>> - void *buf, loff_t pos, size_t count)
>> +static ssize_t fw_cfg_read_blob(u16 key,
>> + void *buf, loff_t pos, size_t count)
>>  {
>>   u32 glk = -1U;
>>   acpi_status status;
>> @@ -73,7 +73,7 @@ static void fw_cfg_read_blob(u16 key,
>>   /* Should never get here */
>>   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
>>   memset(buf, 0, count);
>> - return;
>> + return -EINVAL;
>>   }
>>
>>   mutex_lock(&fw_cfg_dev_lock);
>
> Wouldn't something like -EBUSY be more appropriate?

In theory, it would be a general failure right? I don't think we want
the caller to retry. I think in EINVAL fits better, but I don't think
it matters much this or EBUSY.

>> @@ -84,6 +84,7 @@ static void fw_cfg_read_blob(u16 key,
>>   mutex_unlock(&fw_cfg_dev_lock);
>>
>>   acpi_release_global_lock(glk);
>> + return count;
>>  }
>>
>>  /* clean up fw_cfg device i/o */
>> @@ -165,8 +166,9 @@ static int fw_cfg_do_platform_probe(struct 
>> platform_device *pdev)
>>   }
>>
>>   /* verify fw_cfg device signature */
>> - fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
>> - if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
>> + if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
>> + 0, FW_CFG_SIG_SIZE) < 0 ||
>> + memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
>>   fw_cfg_io_cleanup();
>>   return -ENODEV;
>>   }
>> @@ -326,8 +328,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
>> struct kobject *kobj,
>>   if (count > entry->size - pos)
>>   count = entry->size - pos;
>>
>> - fw_cfg_read_blob(entry->select, buf, pos, count);
>> - return count;
>> + return fw_cfg_read_blob(entry->select, buf, pos, count);
>>  }
>>
>>  static struct bin_attribute fw_cfg_sysfs_attr_raw = {
>> @@ -483,7 +484,11 @@ static int fw_cfg_register_dir_entries(void)
>>   struct fw_cfg_file *dir;
>>   size_t dir_size;
>>
>> - fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count, 0, 
>> sizeof(files_count));
>> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count,
>> + 0, sizeof(files_count));
>> + if (ret < 0)
>> + return ret;
>> +
>>   count = be32_to_cpu(files_count);
>>   dir_size = count * sizeof(struct fw_cfg_file);
>>
>> @@ -491,7 +496,10 @@ static int fw_cfg_register_dir_entries(void)
>>   if (!dir)
>>   return -ENOMEM;
>>
>> - fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
>> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
>> + sizeof(files_count), dir_size);
>> + if (ret < 0)
>> + goto end;
>>
>>   for (i = 0; i < count; i++) {
>>   ret = fw_cfg_register_file(&dir[i]);
>> @@ -499,6 +507,7 @@ static int fw_cfg_register_dir_entries(void)
>>   break;
>>   }
>>
>> +end:
>> 

[PATCH v15 01/11] crash: export paddr_vmcoreinfo_note()

2018-02-15 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 02/11] fw_cfg: add a public uapi header

2018-02-15 Thread Marc-André Lureau
Create a common header file for well-known values and structures to be
shared by the Linux kernel with qemu or other projects.

It is based from qemu/docs/specs/fw_cfg.txt which references
qemu/include/hw/nvram/fw_cfg_keys.h "for the most up-to-date and
authoritative list" & vmcoreinfo.txt. Those files don't have an
explicit license, but qemu/hw/nvram/fw_cfg.c is BSD-license, so
Michael S. Tsirkin suggested to use the same license.

The patch intentionally left out DMA & vmcoreinfo structures &
defines, which are added in the commits making usage of it.

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Marc-André Lureau 

---

The related qemu patch making use of it, to be submitted:
https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
---
 MAINTAINERS|  1 +
 drivers/firmware/qemu_fw_cfg.c | 22 ++
 include/uapi/linux/fw_cfg.h| 66 ++
 3 files changed, 69 insertions(+), 20 deletions(-)
 create mode 100644 include/uapi/linux/fw_cfg.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..a66b65f62811 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11352,6 +11352,7 @@ M:  "Michael S. Tsirkin" 
 L: qemu-de...@nongnu.org
 S: Maintained
 F: drivers/firmware/qemu_fw_cfg.c
+F: include/uapi/linux/fw_cfg.h
 
 QIB DRIVER
 M: Dennis Dalessandro 
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index a41b572eeeb1..42601a3eaed5 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -32,30 +32,12 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
 MODULE_LICENSE("GPL");
 
-/* selector key values for "well-known" fw_cfg entries */
-#define FW_CFG_SIGNATURE  0x00
-#define FW_CFG_ID 0x01
-#define FW_CFG_FILE_DIR   0x19
-
-/* size in bytes of fw_cfg signature */
-#define FW_CFG_SIG_SIZE 4
-
-/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
-#define FW_CFG_MAX_FILE_PATH 56
-
-/* fw_cfg file directory entry type */
-struct fw_cfg_file {
-   u32 size;
-   u16 select;
-   u16 reserved;
-   char name[FW_CFG_MAX_FILE_PATH];
-};
-
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
-   { "QEMU0002", },
+   { FW_CFG_ACPI_DEVICE_ID, },
{},
 };
 MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
new file mode 100644
index ..c698ac3812f6
--- /dev/null
+++ b/include/uapi/linux/fw_cfg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+#ifndef _LINUX_FW_CFG_H
+#define _LINUX_FW_CFG_H
+
+#include 
+
+#define FW_CFG_ACPI_DEVICE_ID  "QEMU0002"
+
+/* selector key values for "well-known" fw_cfg entries */
+#define FW_CFG_SIGNATURE   0x00
+#define FW_CFG_ID  0x01
+#define FW_CFG_UUID0x02
+#define FW_CFG_RAM_SIZE0x03
+#define FW_CFG_NOGRAPHIC   0x04
+#define FW_CFG_NB_CPUS 0x05
+#define FW_CFG_MACHINE_ID  0x06
+#define FW_CFG_KERNEL_ADDR 0x07
+#define FW_CFG_KERNEL_SIZE 0x08
+#define FW_CFG_KERNEL_CMDLINE  0x09
+#define FW_CFG_INITRD_ADDR 0x0a
+#define FW_CFG_INITRD_SIZE 0x0b
+#define FW_CFG_BOOT_DEVICE 0x0c
+#define FW_CFG_NUMA0x0d
+#define FW_CFG_BOOT_MENU   0x0e
+#define FW_CFG_MAX_CPUS0x0f
+#define FW_CFG_KERNEL_ENTRY0x10
+#define FW_CFG_KERNEL_DATA 0x11
+#define FW_CFG_INITRD_DATA 0x12
+#define FW_CFG_CMDLINE_ADDR0x13
+#define FW_CFG_CMDLINE_SIZE0x14
+#define FW_CFG_CMDLINE_DATA0x15
+#define FW_CFG_SETUP_ADDR  0x16
+#define FW_CFG_SETUP_SIZE  0x17
+#define FW_CFG_SETUP_DATA  0x18
+#define FW_CFG_FILE_DIR0x19
+
+#define FW_CFG_FILE_FIRST  0x20
+#define FW_CFG_FILE_SLOTS_MIN  0x10
+
+#define FW_CFG_WRITE_CHANNEL   0x4000
+#define FW_CFG_ARCH_LOCAL  0x8000
+#define FW_CFG_ENTRY_MASK  (~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
+
+#define FW_CFG_INVALID 0x
+
+/* width in bytes of fw_cfg control register */
+#define FW_CFG_CTL_SIZE0x02
+
+/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
+#define FW_CFG_MAX_FILE_PATH   56
+
+/* size in bytes of fw_cfg signature */
+#define FW_CFG_SIG_SIZE 4
+
+/* FW_CFG_ID bits */
+#define FW_CFG_VERSION 0x01
+
+/* fw_cfg file directory entry type */
+struct fw_cfg_file {
+   __be32 size;
+   __be16 select;
+   __u16 reserved;
+   char name[FW_CFG_MAX_FILE_PATH];
+};
+
+#endif
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 05/11] fw_cfg: fix sparse warning reading FW_CFG_ID

2018-02-15 Thread Marc-André Lureau
Use a restricted type for reading FW_CFG_ID, fixing sparse warning:

drivers/firmware/qemu_fw_cfg.c:540:22: warning: cast to restricted __le32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 6ee12c9e079a..71672cb8c427 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -512,6 +512,7 @@ static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
 static int fw_cfg_sysfs_probe(struct platform_device *pdev)
 {
int err;
+   __le32 rev;
 
/* NOTE: If we supported multiple fw_cfg devices, we'd first create
 * a subdirectory named after e.g. pdev->id, then hang per-device
@@ -537,8 +538,8 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
 
/* get revision number, add matching top-level attribute */
-   fw_cfg_read_blob(FW_CFG_ID, &fw_cfg_rev, 0, sizeof(fw_cfg_rev));
-   fw_cfg_rev = le32_to_cpu(fw_cfg_rev);
+   fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   fw_cfg_rev = le32_to_cpu(rev);
err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
if (err)
goto err_rev;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 04/11] fw_cfg: fix sparse warnings with fw_cfg_file

2018-02-15 Thread Marc-André Lureau
Modify fw_cfg_sysfs_entry to store entry values, instead of reusing
the restricted types.

Fixes warnings such as:

$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:491:29: warning: incorrect type in assignment 
(different base types)
drivers/firmware/qemu_fw_cfg.c:491:29:expected restricted __be32 [usertype] 
size
drivers/firmware/qemu_fw_cfg.c:491:29:got unsigned int
drivers/firmware/qemu_fw_cfg.c:492:31: warning: incorrect type in assignment 
(different base types)
drivers/firmware/qemu_fw_cfg.c:492:31:expected restricted __be16 [usertype] 
select
drivers/firmware/qemu_fw_cfg.c:492:31:got int

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 6164731a3c35..6ee12c9e079a 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -193,7 +193,9 @@ static const struct {
 /* fw_cfg_sysfs_entry type */
 struct fw_cfg_sysfs_entry {
struct kobject kobj;
-   struct fw_cfg_file f;
+   u32 size;
+   u16 select;
+   char name[FW_CFG_MAX_FILE_PATH];
struct list_head list;
 };
 
@@ -257,17 +259,17 @@ struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = 
{ \
 
 static ssize_t fw_cfg_sysfs_show_size(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%u\n", e->f.size);
+   return sprintf(buf, "%u\n", e->size);
 }
 
 static ssize_t fw_cfg_sysfs_show_key(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%u\n", e->f.select);
+   return sprintf(buf, "%u\n", e->select);
 }
 
 static ssize_t fw_cfg_sysfs_show_name(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%s\n", e->f.name);
+   return sprintf(buf, "%s\n", e->name);
 }
 
 static FW_CFG_SYSFS_ATTR(size);
@@ -318,13 +320,13 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
 {
struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
 
-   if (pos > entry->f.size)
+   if (pos > entry->size)
return -EINVAL;
 
-   if (count > entry->f.size - pos)
-   count = entry->f.size - pos;
+   if (count > entry->size - pos)
+   count = entry->size - pos;
 
-   fw_cfg_read_blob(entry->f.select, buf, pos, count);
+   fw_cfg_read_blob(entry->select, buf, pos, count);
return count;
 }
 
@@ -443,11 +445,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
return -ENOMEM;
 
/* set file entry information */
-   memcpy(&entry->f, f, sizeof(struct fw_cfg_file));
+   entry->size = be32_to_cpu(f->size);
+   entry->select = be16_to_cpu(f->select);
+   memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
 
/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
-  fw_cfg_sel_ko, "%d", entry->f.select);
+  fw_cfg_sel_ko, "%d", entry->select);
if (err)
goto err_register;
 
@@ -457,7 +461,7 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
goto err_add_raw;
 
/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
-   fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->f.name);
+   fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
 
/* success, add entry to global cache */
fw_cfg_sysfs_cache_enlist(entry);
@@ -489,8 +493,6 @@ static int fw_cfg_register_dir_entries(void)
fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
 
for (i = 0; i < count; i++) {
-   dir[i].size = be32_to_cpu(dir[i].size);
-   dir[i].select = be16_to_cpu(dir[i].select);
ret = fw_cfg_register_file(&dir[i]);
if (ret)
break;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 06/11] fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read

2018-02-15 Thread Marc-André Lureau
Use struct fw_cfg_files to read the directory size, fixing the sparse
warnings:

drivers/firmware/qemu_fw_cfg.c:485:17: warning: cast to restricted __be32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 71672cb8c427..805372e8e50d 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -478,19 +478,20 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
 static int fw_cfg_register_dir_entries(void)
 {
int ret = 0;
+   __be32 files_count;
u32 count, i;
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(count));
-   count = be32_to_cpu(count);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count, 0, sizeof(files_count));
+   count = be32_to_cpu(files_count);
dir_size = count * sizeof(struct fw_cfg_file);
 
dir = kmalloc(dir_size, GFP_KERNEL);
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file(&dir[i]);
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 08/11] fw_cfg: handle fw_cfg_read_blob() error

2018-02-15 Thread Marc-André Lureau
fw_cfg_read_blob() may fail, but does not return error. This may lead
to undefined behaviours, such as a memcmp(sig, "QEMU") on uninitilized
memory. Return an error if ACPI locking failed. Also, the following
DMA read/write extension will add more error paths that should be
handled appropriately.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index f6f90bef604c..5e6e5ac71dab 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -59,8 +59,8 @@ static void fw_cfg_sel_endianness(u16 key)
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count)
 {
u32 glk = -1U;
acpi_status status;
@@ -73,7 +73,7 @@ static void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
@@ -84,6 +84,7 @@ static void fw_cfg_read_blob(u16 key,
mutex_unlock(&fw_cfg_dev_lock);
 
acpi_release_global_lock(glk);
+   return count;
 }
 
 /* clean up fw_cfg device i/o */
@@ -165,8 +166,9 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
}
 
/* verify fw_cfg device signature */
-   fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
-   if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
+   if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
+   0, FW_CFG_SIG_SIZE) < 0 ||
+   memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
fw_cfg_io_cleanup();
return -ENODEV;
}
@@ -326,8 +328,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
if (count > entry->size - pos)
count = entry->size - pos;
 
-   fw_cfg_read_blob(entry->select, buf, pos, count);
-   return count;
+   return fw_cfg_read_blob(entry->select, buf, pos, count);
 }
 
 static struct bin_attribute fw_cfg_sysfs_attr_raw = {
@@ -483,7 +484,11 @@ static int fw_cfg_register_dir_entries(void)
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count, 0, sizeof(files_count));
+   ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count,
+   0, sizeof(files_count));
+   if (ret < 0)
+   return ret;
+
count = be32_to_cpu(files_count);
dir_size = count * sizeof(struct fw_cfg_file);
 
@@ -491,7 +496,10 @@ static int fw_cfg_register_dir_entries(void)
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files_count), dir_size);
+   ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
+   sizeof(files_count), dir_size);
+   if (ret < 0)
+   goto end;
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file(&dir[i]);
@@ -499,6 +507,7 @@ static int fw_cfg_register_dir_entries(void)
break;
}
 
+end:
kfree(dir);
return ret;
 }
@@ -539,7 +548,10 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
 
/* get revision number, add matching top-level attribute */
-   fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   err = fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   if (err < 0)
+   goto err_probe;
+
fw_cfg_rev = le32_to_cpu(rev);
err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
if (err)
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 10/11] fw_cfg: write vmcoreinfo details

2018-02-15 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 143 -
 include/uapi/linux/fw_cfg.h|  31 +
 2 files changed, 171 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index c28bec4b5663..3015e77aebca 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -34,11 +34,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
 MODULE_LICENSE("GPL");
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -60,6 +66,64 @@ static void fw_cfg_sel_endianness(u16 key)
iowrite16(key, fw_cfg_reg_ctrl);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
+{
+   for (;;) {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   /* do not reorder the read to d->control */
+   rmb();
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   cpu_relax();
+   }
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma_access *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   /* fw_cfg device does not need IOMMU protection, so use physical 
addresses */
+   *d = (struct fw_cfg_dma_access) {
+   .address = cpu_to_be64(address ? virt_to_phys(address) : 0),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   /* force memory to sync before notifying device via MMIO */
+   wmb();
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static ssize_t fw_cfg_read_blob(u16 key,
void *buf, loff_t pos, size_t count)
@@ -89,6 +153,47 @@ static ssize_t fw_cfg_read_blob(u16 key,
return count;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   fw_cfg_sel_endianness(key);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -188,9 +293,6 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
return 0;
 }
 
-/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
-static u32 fw_cfg_re

[PATCH v15 07/11] fw_cfg: remove inline from fw_cfg_read_blob()

2018-02-15 Thread Marc-André Lureau
The function is not small and getting bigger.

Let the compiler decide instead. No profiling done, hopefully
unnecessary.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 805372e8e50d..f6f90bef604c 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -59,8 +59,8 @@ static void fw_cfg_sel_endianness(u16 key)
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static void fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count)
 {
u32 glk = -1U;
acpi_status status;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 03/11] fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()

2018-02-15 Thread Marc-André Lureau
Dispatch to the appropriate iowrite() instead of casting restricted
type to u16.

- if fw_cfg_is_mmio:
  before: iowrite16(cpu_to_be16(key))
  after: iowrite16be(key)
- if !fw_cfg_is_mmio:
  before: iowrite16(cpu_to_le16(key))
  after: iowrite16(key)
  which is equivalent on little-endian systems, where fw_cfg IO is supported.

Fixes:
$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:55:33: warning: restricted __be16 degrades to 
integer
drivers/firmware/qemu_fw_cfg.c:55:52: warning: restricted __le16 degrades to 
integer

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 42601a3eaed5..6164731a3c35 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -50,9 +50,12 @@ static void __iomem *fw_cfg_reg_data;
 static DEFINE_MUTEX(fw_cfg_dev_lock);
 
 /* pick appropriate endianness for selector key */
-static inline u16 fw_cfg_sel_endianness(u16 key)
+static void fw_cfg_sel_endianness(u16 key)
 {
-   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
+   if (fw_cfg_is_mmio)
+   iowrite16be(key, fw_cfg_reg_ctrl);
+   else
+   iowrite16(key, fw_cfg_reg_ctrl);
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
@@ -74,7 +77,7 @@ static inline void fw_cfg_read_blob(u16 key,
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   fw_cfg_sel_endianness(key);
while (pos-- > 0)
ioread8(fw_cfg_reg_data);
ioread8_rep(fw_cfg_reg_data, buf, count);
-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 00/11] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-02-15 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (entry added since qemu 2.11 with -device vmcoreinfo).

v15:
- fix fw_cfg.h uapi header #include
- use BSD license for fw_cfg.h uapi header
- move the uapi defines/structs for DMA & vmcoreinfo in the
  corresponding patch
- use cpu_relax() instead of usleep_range(50, 100);
- replace do { } while(true) by for (;;)
- fix the rmb() call location
- add a preliminary patch to handle error from fw_cfg_write_blob()
- rewrite fw_cfg_sel_endianness() to wrap iowrite() calls

v14:
- add "fw_cfg: add a public uapi header"
- fix sparse warnings & don't introduce new warnings
- add memory barriers to force IO ordering
- split fw_cfg_read_blob() in fw_cfg_read_blob_io() and
  fw_cfg_read_blob_dma()
- add error handling to fw_cfg_read_blob() callers
- minor stylistic changes

v13:
- reorder patch series, introduce DMA write before DMA read
- do some measurements of DMA read speed-ups

v12:
- fix virt_to_phys(NULL) panic with CONFIG_DEBUG_VIRTUAL=y
- do not use DMA read, except for kmalloc() memory we allocated
  ourself (only fw_cfg_register_dir_entries() so far)

v11:
- add #include  in last patch,
  fixing kbuild .config test

Marc-André Lureau (11):
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: add a public uapi header
  fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()
  fw_cfg: fix sparse warnings with fw_cfg_file
  fw_cfg: fix sparse warning reading FW_CFG_ID
  fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
  fw_cfg: remove inline from fw_cfg_read_blob()
  fw_cfg: handle fw_cfg_read_blob() error
  fw_cfg: add DMA register
  fw_cfg: write vmcoreinfo details
  RFC: fw_cfg: do DMA read operation

 MAINTAINERS|   1 +
 drivers/firmware/qemu_fw_cfg.c | 334 +
 include/uapi/linux/fw_cfg.h|  97 
 kernel/crash_core.c|   1 +
 4 files changed, 369 insertions(+), 64 deletions(-)
 create mode 100644 include/uapi/linux/fw_cfg.h

-- 
2.16.1.73.g5832b7e9f2



[PATCH v15 11/11] RFC: fw_cfg: do DMA read operation

2018-02-15 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

So far, only one call in fw_cfg_register_dir_entries() is using
kmalloc'ed buf and is thus clearly eligible to DMA read.

Initially, I didn't implement DMA read to speed up boot time, but as a
first step before introducing DMA write (since read operations were
already presents). Even more, I didn't realize fw-cfg entries were
being read by the kernel during boot by default. But actally fw-cfg
entries are being populated during module probe. I knew DMA improved a
lot bios boot time (the main reason the DMA interface was added
afaik). Let see the time it would take to read the whole ACPI
tables (128kb allocated)

 # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
  - with DMA: sys 0m0.003s
  - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s

FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
boot to populate sysfs qemu_fw_cfg directory, and it is quite
small (1-2kb). Since it does not expose itself, in order to measure
the time it takes to read such small file, I took a comparable sized
file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
modified read_raw enabling DMA)

 # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
  - with DMA:
  0.636037  task-clock (msec) #0.141 CPUs utilized  
  ( +-  1.19% )
  - without DMA:
  6.430128  task-clock (msec) #0.622 CPUs utilized  
  ( +-  0.22% )

That's a few msec saved during boot by enabling DMA read (the gain
would be more substantial if other & bigger fw-cfg entries are read by
others from sysfs, unfortunately, it's not clear if we can always
enable DMA there)

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 61 ++
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 3015e77aebca..94df57e9be66 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -124,12 +124,47 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
return ret;
 }
 
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_dma(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   ssize_t ret;
+
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+   | FW_CFG_DMA_CTL_SELECT
+   | FW_CFG_DMA_CTL_READ);
+   } else {
+   fw_cfg_sel_endianness(key);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   return ret;
+   ret = fw_cfg_dma_transfer(buf, count,
+   FW_CFG_DMA_CTL_READ);
+   }
+
+   return ret;
+}
+
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_io(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   fw_cfg_sel_endianness(key);
+   while (pos-- > 0)
+   ioread8(fw_cfg_reg_data);
+   ioread8_rep(fw_cfg_reg_data, buf, count);
+   return count;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static ssize_t fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -143,14 +178,17 @@ static ssize_t fw_cfg_read_blob(u16 key,
}
 
mutex_lock(&fw_cfg_dev_lock);
-   fw_cfg_sel_endianness(key);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+   if (dma && fw_cfg_dma_enabled()) {
+   ret = fw_cfg_read_blob_dma(key, buf, pos, count);
+   } else {
+   ret = fw_cfg_read_blob_io(key, buf, pos, count);
+   }
+
mutex_unlock(&fw_cfg_dev_lock);
 
acpi_release_global_lock(glk);
-   return count;
+
+   return ret;
 }
 
 #ifdef CONFIG_CRASH_CORE
@@ -284,7 +322,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
 
/* verify fw_cfg device signature */
if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
-   0, FW_CFG_SIG_SIZE) < 0 ||
+   0, FW_CFG_SIG_SIZE, false) < 0 ||
memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
fw_cfg_io_cleanup()

[PATCH v15 09/11] fw_cfg: add DMA register

2018-02-15 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5e6e5ac71dab..c28bec4b5663 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -45,6 +46,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -104,12 +106,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -119,7 +123,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -156,6 +160,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -165,6 +170,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
0, FW_CFG_SIG_SIZE) < 0 ||
@@ -630,6 +642,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -639,12 +652,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
phys_addr_t base;
-

Re: [PATCH v14 2/9] fw_cfg: add a public uapi header

2018-02-15 Thread Marc-André Lureau
Hi

On Thu, Feb 15, 2018 at 7:20 PM, Michael S. Tsirkin  wrote:
> On Thu, Feb 15, 2018 at 10:25:27AM +0100, Marc-Andre Lureau wrote:
>> Hi
>>
>> On Wed, Feb 14, 2018 at 9:41 PM, Michael S. Tsirkin  wrote:
>> > On Wed, Feb 14, 2018 at 03:18:43PM +0100, Marc-André Lureau wrote:
>> >> Create a common header file for well-known values and structures to be
>> >> shared by the Linux kernel with qemu or other projects.
>> >>
>> >> Suggested-by: Michael S. Tsirkin 
>> >> Signed-off-by: Marc-André Lureau 
>> >>
>> >> ---
>> >>
>> >> The related qemu patch making use of it, to be submitted:
>> >> https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
>> >> ---
>> >>  MAINTAINERS|   1 +
>> >>  drivers/firmware/qemu_fw_cfg.c |  22 +
>> >>  include/uapi/linux/fw_cfg.h| 102 
>> >> +
>> >>  3 files changed, 105 insertions(+), 20 deletions(-)
>> >>  create mode 100644 include/uapi/linux/fw_cfg.h
>> >>
>> >> diff --git a/MAINTAINERS b/MAINTAINERS
>> >> index 3bdc260e36b7..a66b65f62811 100644
>> >> --- a/MAINTAINERS
>> >> +++ b/MAINTAINERS
>> >> @@ -11352,6 +11352,7 @@ M:"Michael S. Tsirkin" 
>> >>  L:   qemu-de...@nongnu.org
>> >>  S:   Maintained
>> >>  F:   drivers/firmware/qemu_fw_cfg.c
>> >> +F:   include/uapi/linux/fw_cfg.h
>> >>
>> >>  QIB DRIVER
>> >>  M:   Dennis Dalessandro 
>> >> diff --git a/drivers/firmware/qemu_fw_cfg.c 
>> >> b/drivers/firmware/qemu_fw_cfg.c
>> >> index a41b572eeeb1..90f467232777 100644
>> >> --- a/drivers/firmware/qemu_fw_cfg.c
>> >> +++ b/drivers/firmware/qemu_fw_cfg.c
>> >> @@ -32,30 +32,12 @@
>> >>  #include 
>> >>  #include 
>> >>  #include 
>> >> +#include 
>> >>
>> >>  MODULE_AUTHOR("Gabriel L. Somlo ");
>> >>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>> >>  MODULE_LICENSE("GPL");
>> >>
>> >> -/* selector key values for "well-known" fw_cfg entries */
>> >> -#define FW_CFG_SIGNATURE  0x00
>> >> -#define FW_CFG_ID 0x01
>> >> -#define FW_CFG_FILE_DIR   0x19
>> >> -
>> >> -/* size in bytes of fw_cfg signature */
>> >> -#define FW_CFG_SIG_SIZE 4
>> >> -
>> >> -/* fw_cfg "file name" is up to 56 characters (including terminating nul) 
>> >> */
>> >> -#define FW_CFG_MAX_FILE_PATH 56
>> >> -
>> >> -/* fw_cfg file directory entry type */
>> >> -struct fw_cfg_file {
>> >> - u32 size;
>> >> - u16 select;
>> >> - u16 reserved;
>> >> - char name[FW_CFG_MAX_FILE_PATH];
>> >> -};
>> >> -
>> >>  /* fw_cfg device i/o register addresses */
>> >>  static bool fw_cfg_is_mmio;
>> >>  static phys_addr_t fw_cfg_p_base;
>> >> @@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
>> >>
>> >>  #ifdef CONFIG_ACPI
>> >>  static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
>> >> - { "QEMU0002", },
>> >> + { FW_CFG_ACPI_DEVICE_ID, },
>> >>   {},
>> >>  };
>> >>  MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
>> >> diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
>> >> new file mode 100644
>> >> index ..5b8136ce46ee
>> >> --- /dev/null
>> >> +++ b/include/uapi/linux/fw_cfg.h
>> >> @@ -0,0 +1,102 @@
>> >> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> >> +#ifndef _LINUX_FW_CFG_H
>> >> +#define _LINUX_FW_CFG_H
>> >> +
>> >> +#include 
>> >> +
>> >> +#define FW_CFG_ACPI_DEVICE_ID"QEMU0002"
>> >> +
>> >> +/* selector key values for "well-known" fw_cfg entries */
>> >> +#define FW_CFG_SIGNATURE 0x00
>> >> +#define FW_CFG_ID0x01
>> >> +#define FW_CFG_UUID  0x02
>> >> +#define FW_CFG_RAM_SIZE  0x03
>> >> +#define FW_CFG_NOGRAPHIC 0x04
>> >> +#define FW_CFG_NB_CPUS 

Re: [PATCH v14 8/9] fw_cfg: write vmcoreinfo details

2018-02-15 Thread Marc-André Lureau
Hi

On Thu, Feb 15, 2018 at 7:09 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:49PM +0100, Marc-André Lureau wrote:
>> If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
>> the kdump kernel, write the addr/size of the vmcoreinfo ELF note.
>>
>> The DMA operation is expected to run synchronously with today qemu,
>> but the specification states that it may become async, so we run
>> "control" field check in a loop for eventual changes.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 144 
>> -
>>  1 file changed, 141 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 37638b95cb45..69939e2529f2 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -34,11 +34,17 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
>> +static u32 fw_cfg_rev;
>> +
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -59,6 +65,65 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
>>   (u16 __force)cpu_to_le16(key);
>>  }
>>
>> +static inline bool fw_cfg_dma_enabled(void)
>> +{
>> + return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
>> +}
>> +
>> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> +static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
>> +{
>> + do {
>> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> +
>> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> + return;
>> +
>> + usleep_range(50, 100);
>
> I would just do cpu_relax() here.

ok, I didn't know that one.

>
>> + } while (true);
>> +
>> + /* do not reorder the read to d->control */
>> + rmb();
>
> Hmm. I don't really understand the comment.
> Is this code ever reacheable? How does it help?

I thought that's what you suggested in v13 review, but true, I should
replace the return with a break to reach it. Is that what you expect
too? (my understanding is to make sure the READ_ONCE(control) in
wait_for_control happens before READ_ONCE(control) after in
dma_transfer)

>
>> +}
>> +
>> +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>> +{
>> + phys_addr_t dma;
>> + struct fw_cfg_dma_access *d = NULL;
>> + ssize_t ret = length;
>> +
>> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> + if (!d) {
>> + ret = -ENOMEM;
>> + goto end;
>> + }
>> +
>> + /* fw_cfg device does not need IOMMU protection, so use physical 
>> addresses */
>> + *d = (struct fw_cfg_dma_access) {
>> + .address = cpu_to_be64(address ? virt_to_phys(address) : 0),
>> + .length = cpu_to_be32(length),
>> + .control = cpu_to_be32(control)
>> + };
>> +
>> + dma = virt_to_phys(d);
>> +
>> + iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> + /* force memory to sync before notifying device via MMIO */
>> + wmb();
>> + iowrite32be(dma, fw_cfg_reg_dma + 4);
>> +
>> + fw_cfg_wait_for_control(d);
>> +
>> + if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> + ret = -EIO;
>> + }
>> +
>> +end:
>> + kfree(d);
>> +
>> + return ret;
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>>  static inline void fw_cfg_read_blob(u16 key,
>>   void *buf, loff_t pos, size_t count)
>> @@ -87,6 +152,47 @@ static inline void fw_cfg_read_blob(u16 key,
>>   acpi_release_global_lock(glk);
>>  }
>>
>> +#ifdef CONFIG_CRASH_CORE
>> +/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> +static ssize_t fw_cfg_write_blob(u16 key,
>> +  void *buf, loff_t pos, size_t count)
>> +{
>> 

[PATCH v14 1/9] crash: export paddr_vmcoreinfo_note()

2018-02-14 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.1.73.g5832b7e9f2



[PATCH v14 3/9] fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()

2018-02-14 Thread Marc-André Lureau
The function is used for both LE & BE target type, use __force casting.

Fixes:
$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:55:33: warning: restricted __be16 degrades to 
integer
drivers/firmware/qemu_fw_cfg.c:55:52: warning: restricted __le16 degrades to 
integer

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 90f467232777..85e693287d87 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -52,7 +52,9 @@ static DEFINE_MUTEX(fw_cfg_dev_lock);
 /* pick appropriate endianness for selector key */
 static inline u16 fw_cfg_sel_endianness(u16 key)
 {
-   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
+   return fw_cfg_is_mmio ?
+   (u16 __force)cpu_to_be16(key) :
+   (u16 __force)cpu_to_le16(key);
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-- 
2.16.1.73.g5832b7e9f2



[PATCH v14 6/9] fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read

2018-02-14 Thread Marc-André Lureau
Use struct fw_cfg_files to read the directory size, fixing the sparse
warnings:

drivers/firmware/qemu_fw_cfg.c:485:17: warning: cast to restricted __be32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 4c4813409447..c4c726841ba7 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -478,18 +478,19 @@ static int fw_cfg_register_dir_entries(void)
 {
int ret = 0;
u32 count, i;
+   struct fw_cfg_files files;
struct fw_cfg_file *dir;
size_t dir_size;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(count));
-   count = be32_to_cpu(count);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, &files.count, 0, sizeof(files.count));
+   count = be32_to_cpu(files.count);
dir_size = count * sizeof(struct fw_cfg_file);
 
dir = kmalloc(dir_size, GFP_KERNEL);
if (!dir)
return -ENOMEM;
 
-   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
+   fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files.count), dir_size);
 
for (i = 0; i < count; i++) {
ret = fw_cfg_register_file(&dir[i]);
-- 
2.16.1.73.g5832b7e9f2



[PATCH v14 8/9] fw_cfg: write vmcoreinfo details

2018-02-14 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 144 -
 1 file changed, 141 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 37638b95cb45..69939e2529f2 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -34,11 +34,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
 MODULE_LICENSE("GPL");
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -59,6 +65,65 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
(u16 __force)cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
+{
+   do {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+
+   /* do not reorder the read to d->control */
+   rmb();
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma_access *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   /* fw_cfg device does not need IOMMU protection, so use physical 
addresses */
+   *d = (struct fw_cfg_dma_access) {
+   .address = cpu_to_be64(address ? virt_to_phys(address) : 0),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   /* force memory to sync before notifying device via MMIO */
+   wmb();
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static inline void fw_cfg_read_blob(u16 key,
void *buf, loff_t pos, size_t count)
@@ -87,6 +152,47 @@ static inline void fw_cfg_read_blob(u16 key,
acpi_release_global_lock(glk);
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -185,9 +291,6 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
return 0;
 }
 
-/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
-static u3

[PATCH v14 9/9] RFC: fw_cfg: do DMA read operation

2018-02-14 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

So far, only one call in fw_cfg_register_dir_entries() is using
kmalloc'ed buf and is thus clearly eligible to DMA read.

Initially, I didn't implement DMA read to speed up boot time, but as a
first step before introducing DMA write (since read operations were
already presents). Even more, I didn't realize fw-cfg entries were
being read by the kernel during boot by default. But actally fw-cfg
entries are being populated during module probe. I knew DMA improved a
lot bios boot time (the main reason the DMA interface was added
afaik). Let see the time it would take to read the whole ACPI
tables (128kb allocated)

 # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
  - with DMA: sys 0m0.003s
  - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s

FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
boot to populate sysfs qemu_fw_cfg directory, and it is quite
small (1-2kb). Since it does not expose itself, in order to measure
the time it takes to read such small file, I took a comparable sized
file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
modified read_raw enabling DMA)

 # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
  - with DMA:
  0.636037  task-clock (msec) #0.141 CPUs utilized  
  ( +-  1.19% )
  - without DMA:
  6.430128  task-clock (msec) #0.622 CPUs utilized  
  ( +-  0.22% )

That's a few msec saved during boot by enabling DMA read (the gain
would be more substantial if other & bigger fw-cfg entries are read by
others from sysfs, unfortunately, it's not clear if we can always
enable DMA there)

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 80 ++
 1 file changed, 66 insertions(+), 14 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 69939e2529f2..ba9b907a4399 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -124,12 +124,46 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
return ret;
 }
 
+/* with acpi & dev locks taken */
+static ssize_t fw_cfg_read_blob_dma(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   ssize_t ret;
+
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+   | FW_CFG_DMA_CTL_SELECT
+   | FW_CFG_DMA_CTL_READ);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   return ret;
+   ret = fw_cfg_dma_transfer(buf, count,
+   FW_CFG_DMA_CTL_READ);
+   }
+
+   return ret;
+}
+
+/* with acpi & dev locks taken */
+static void fw_cfg_read_blob_io(u16 key,
+   void *buf, loff_t pos, size_t count)
+{
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   while (pos-- > 0)
+   ioread8(fw_cfg_reg_data);
+   ioread8_rep(fw_cfg_reg_data, buf, count);
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -139,17 +173,21 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+   if (dma && fw_cfg_dma_enabled()) {
+   ret = fw_cfg_read_blob_dma(key, buf, pos, count);
+   } else {
+   fw_cfg_read_blob_io(key, buf, pos, count);
+   }
+
mutex_unlock(&fw_cfg_dev_lock);
 
acpi_release_global_lock(glk);
+
+   return ret;
 }
 
 #ifdef CONFIG_CRASH_CORE
@@ -282,8 +320,9 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
 #endif
 
   

[PATCH v14 7/9] fw_cfg: add DMA register

2018-02-14 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index c4c726841ba7..37638b95cb45 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -45,6 +46,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -102,12 +104,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -117,7 +121,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -154,6 +158,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -163,6 +168,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -617,6 +629,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -626,12 +639,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v14 5/9] fw_cfg: fix sparse warning reading FW_CFG_ID

2018-02-14 Thread Marc-André Lureau
Use a restricted type for reading FW_CFG_ID, fixing sparse warning:

drivers/firmware/qemu_fw_cfg.c:540:22: warning: cast to restricted __le32

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 8ad19086e5c5..4c4813409447 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -511,6 +511,7 @@ static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
 static int fw_cfg_sysfs_probe(struct platform_device *pdev)
 {
int err;
+   __le32 rev;
 
/* NOTE: If we supported multiple fw_cfg devices, we'd first create
 * a subdirectory named after e.g. pdev->id, then hang per-device
@@ -536,8 +537,8 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
 
/* get revision number, add matching top-level attribute */
-   fw_cfg_read_blob(FW_CFG_ID, &fw_cfg_rev, 0, sizeof(fw_cfg_rev));
-   fw_cfg_rev = le32_to_cpu(fw_cfg_rev);
+   fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
+   fw_cfg_rev = le32_to_cpu(rev);
err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
if (err)
goto err_rev;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v14 0/9] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-02-14 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (entry added since qemu 2.11 with -device vmcoreinfo).

v14:
- add "fw_cfg: add a public uapi header"
- fix sparse warnings & don't introduce new warnings
- add memory barriers to force IO ordering
- split fw_cfg_read_blob() in fw_cfg_read_blob_io() and
  fw_cfg_read_blob_dma()
- add error handling to fw_cfg_read_blob() callers
- minor stylistic changes

v13:
- reorder patch series, introduce DMA write before DMA read
- do some measurements of DMA read speed-ups

v12:
- fix virt_to_phys(NULL) panic with CONFIG_DEBUG_VIRTUAL=y
- do not use DMA read, except for kmalloc() memory we allocated
  ourself (only fw_cfg_register_dir_entries() so far)

v11:
- add #include  in last patch,
  fixing kbuild .config test

Marc-André Lureau (9):
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: add a public uapi header
  fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()
  fw_cfg: fix sparse warnings with fw_cfg_file
  fw_cfg: fix sparse warning reading FW_CFG_ID
  fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
  fw_cfg: add DMA register
  fw_cfg: write vmcoreinfo details
  RFC: fw_cfg: do DMA read operation

 MAINTAINERS|   1 +
 drivers/firmware/qemu_fw_cfg.c | 333 +
 include/uapi/linux/fw_cfg.h| 102 +
 kernel/crash_core.c|   1 +
 4 files changed, 374 insertions(+), 63 deletions(-)
 create mode 100644 include/uapi/linux/fw_cfg.h

-- 
2.16.1.73.g5832b7e9f2



[PATCH v14 4/9] fw_cfg: fix sparse warnings with fw_cfg_file

2018-02-14 Thread Marc-André Lureau
Modify fw_cfg_sysfs_entry to store entry values, instead of reusing
the restricted types.

Fixes warnings such as:

$ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o

drivers/firmware/qemu_fw_cfg.c:491:29: warning: incorrect type in assignment 
(different base types)
drivers/firmware/qemu_fw_cfg.c:491:29:expected restricted __be32 [usertype] 
size
drivers/firmware/qemu_fw_cfg.c:491:29:got unsigned int
drivers/firmware/qemu_fw_cfg.c:492:31: warning: incorrect type in assignment 
(different base types)
drivers/firmware/qemu_fw_cfg.c:492:31:expected restricted __be16 [usertype] 
select
drivers/firmware/qemu_fw_cfg.c:492:31:got int

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 85e693287d87..8ad19086e5c5 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -192,7 +192,9 @@ static const struct {
 /* fw_cfg_sysfs_entry type */
 struct fw_cfg_sysfs_entry {
struct kobject kobj;
-   struct fw_cfg_file f;
+   u32 size;
+   u16 select;
+   char name[FW_CFG_MAX_FILE_PATH];
struct list_head list;
 };
 
@@ -256,17 +258,17 @@ struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = 
{ \
 
 static ssize_t fw_cfg_sysfs_show_size(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%u\n", e->f.size);
+   return sprintf(buf, "%u\n", e->size);
 }
 
 static ssize_t fw_cfg_sysfs_show_key(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%u\n", e->f.select);
+   return sprintf(buf, "%u\n", e->select);
 }
 
 static ssize_t fw_cfg_sysfs_show_name(struct fw_cfg_sysfs_entry *e, char *buf)
 {
-   return sprintf(buf, "%s\n", e->f.name);
+   return sprintf(buf, "%s\n", e->name);
 }
 
 static FW_CFG_SYSFS_ATTR(size);
@@ -317,13 +319,13 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, 
struct kobject *kobj,
 {
struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
 
-   if (pos > entry->f.size)
+   if (pos > entry->size)
return -EINVAL;
 
-   if (count > entry->f.size - pos)
-   count = entry->f.size - pos;
+   if (count > entry->size - pos)
+   count = entry->size - pos;
 
-   fw_cfg_read_blob(entry->f.select, buf, pos, count);
+   fw_cfg_read_blob(entry->select, buf, pos, count);
return count;
 }
 
@@ -442,11 +444,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
return -ENOMEM;
 
/* set file entry information */
-   memcpy(&entry->f, f, sizeof(struct fw_cfg_file));
+   entry->size = be32_to_cpu(f->size);
+   entry->select = be16_to_cpu(f->select);
+   memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
 
/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
-  fw_cfg_sel_ko, "%d", entry->f.select);
+  fw_cfg_sel_ko, "%d", entry->select);
if (err)
goto err_register;
 
@@ -456,7 +460,7 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
goto err_add_raw;
 
/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
-   fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->f.name);
+   fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
 
/* success, add entry to global cache */
fw_cfg_sysfs_cache_enlist(entry);
@@ -488,8 +492,6 @@ static int fw_cfg_register_dir_entries(void)
fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
 
for (i = 0; i < count; i++) {
-   dir[i].size = be32_to_cpu(dir[i].size);
-   dir[i].select = be16_to_cpu(dir[i].select);
ret = fw_cfg_register_file(&dir[i]);
if (ret)
break;
-- 
2.16.1.73.g5832b7e9f2



[PATCH v14 2/9] fw_cfg: add a public uapi header

2018-02-14 Thread Marc-André Lureau
Create a common header file for well-known values and structures to be
shared by the Linux kernel with qemu or other projects.

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Marc-André Lureau 

---

The related qemu patch making use of it, to be submitted:
https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
---
 MAINTAINERS|   1 +
 drivers/firmware/qemu_fw_cfg.c |  22 +
 include/uapi/linux/fw_cfg.h| 102 +
 3 files changed, 105 insertions(+), 20 deletions(-)
 create mode 100644 include/uapi/linux/fw_cfg.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..a66b65f62811 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11352,6 +11352,7 @@ M:  "Michael S. Tsirkin" 
 L: qemu-de...@nongnu.org
 S: Maintained
 F: drivers/firmware/qemu_fw_cfg.c
+F: include/uapi/linux/fw_cfg.h
 
 QIB DRIVER
 M: Dennis Dalessandro 
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index a41b572eeeb1..90f467232777 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -32,30 +32,12 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
 MODULE_LICENSE("GPL");
 
-/* selector key values for "well-known" fw_cfg entries */
-#define FW_CFG_SIGNATURE  0x00
-#define FW_CFG_ID 0x01
-#define FW_CFG_FILE_DIR   0x19
-
-/* size in bytes of fw_cfg signature */
-#define FW_CFG_SIG_SIZE 4
-
-/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
-#define FW_CFG_MAX_FILE_PATH 56
-
-/* fw_cfg file directory entry type */
-struct fw_cfg_file {
-   u32 size;
-   u16 select;
-   u16 reserved;
-   char name[FW_CFG_MAX_FILE_PATH];
-};
-
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
-   { "QEMU0002", },
+   { FW_CFG_ACPI_DEVICE_ID, },
{},
 };
 MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
new file mode 100644
index ..5b8136ce46ee
--- /dev/null
+++ b/include/uapi/linux/fw_cfg.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_FW_CFG_H
+#define _LINUX_FW_CFG_H
+
+#include 
+
+#define FW_CFG_ACPI_DEVICE_ID  "QEMU0002"
+
+/* selector key values for "well-known" fw_cfg entries */
+#define FW_CFG_SIGNATURE   0x00
+#define FW_CFG_ID  0x01
+#define FW_CFG_UUID0x02
+#define FW_CFG_RAM_SIZE0x03
+#define FW_CFG_NOGRAPHIC   0x04
+#define FW_CFG_NB_CPUS 0x05
+#define FW_CFG_MACHINE_ID  0x06
+#define FW_CFG_KERNEL_ADDR 0x07
+#define FW_CFG_KERNEL_SIZE 0x08
+#define FW_CFG_KERNEL_CMDLINE  0x09
+#define FW_CFG_INITRD_ADDR 0x0a
+#define FW_CFG_INITRD_SIZE 0x0b
+#define FW_CFG_BOOT_DEVICE 0x0c
+#define FW_CFG_NUMA0x0d
+#define FW_CFG_BOOT_MENU   0x0e
+#define FW_CFG_MAX_CPUS0x0f
+#define FW_CFG_KERNEL_ENTRY0x10
+#define FW_CFG_KERNEL_DATA 0x11
+#define FW_CFG_INITRD_DATA 0x12
+#define FW_CFG_CMDLINE_ADDR0x13
+#define FW_CFG_CMDLINE_SIZE0x14
+#define FW_CFG_CMDLINE_DATA0x15
+#define FW_CFG_SETUP_ADDR  0x16
+#define FW_CFG_SETUP_SIZE  0x17
+#define FW_CFG_SETUP_DATA  0x18
+#define FW_CFG_FILE_DIR0x19
+
+#define FW_CFG_FILE_FIRST  0x20
+#define FW_CFG_FILE_SLOTS_MIN  0x10
+
+#define FW_CFG_WRITE_CHANNEL   0x4000
+#define FW_CFG_ARCH_LOCAL  0x8000
+#define FW_CFG_ENTRY_MASK  (~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
+
+#define FW_CFG_INVALID 0x
+
+/* width in bytes of fw_cfg control register */
+#define FW_CFG_CTL_SIZE0x02
+
+/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
+#define FW_CFG_MAX_FILE_PATH   56
+
+/* size in bytes of fw_cfg signature */
+#define FW_CFG_SIG_SIZE 4
+
+/* FW_CFG_ID bits */
+#define FW_CFG_VERSION 0x01
+#define FW_CFG_VERSION_DMA 0x02
+
+/* fw_cfg file directory entry type */
+struct fw_cfg_file {
+   __be32 size;/* file size */
+   __be16 select;  /* write this to 0x510 to read it */
+   __u16 reserved;
+   char name[FW_CFG_MAX_FILE_PATH];
+};
+
+struct fw_cfg_files {
+   __be32 count; /* number of entries */
+   struct fw_cfg_file f[];
+};
+
+/* FW_CFG_DMA_CONTROL bits */
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
+#define FW

[PATCH v13 0/4] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-02-06 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (entry added since qemu 2.11 with -device vmcoreinfo).

v13:
- reorder patch series, introduce DMA write before DMA read
- do some measurements of DMA read speed-ups

v12:
- fix virt_to_phys(NULL) panic with CONFIG_DEBUG_VIRTUAL=y
- do not use DMA read, except for kmalloc() memory we allocated
  ourself (only fw_cfg_register_dir_entries() so far)

v11:
- add #include  in last patch,
  fixing kbuild .config test

Marc-André Lureau (4):
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: add DMA register
  fw_cfg: write vmcoreinfo details
  RFC: fw_cfg: do DMA read operation

 drivers/firmware/qemu_fw_cfg.c | 257 -
 kernel/crash_core.c|   1 +
 2 files changed, 230 insertions(+), 28 deletions(-)

-- 
2.16.1.73.g5832b7e9f2



[PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-06 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 157 -
 1 file changed, 154 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..fd576ba7b337 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +46,24 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +72,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,6 +96,59 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
+{
+   do {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static inline void fw_cfg_read_blob(u16 key,
void *buf, loff_t pos, size_t count)
@@ -103,6 +177,47 @@ static inline void fw_cfg_read_blob(u16 key,
acpi_release_global_lock(glk);
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+

[PATCH v13 4/4] RFC: fw_cfg: do DMA read operation

2018-02-06 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

So far, only one call in fw_cfg_register_dir_entries() is using
kmalloc'ed buf and is thus clearly eligible to DMA read.

Initially, I didn't implement DMA read to speed up boot time, but as a
first step before introducing DMA write (since read operations were
already presents). Even more, I didn't realize fw-cfg entries were
being read by the kernel during boot by default. But actally fw-cfg
entries are being populated during module probe. I knew DMA improved a
lot bios boot time (the main reason the DMA interface was added
afaik). Let see the time it would take to read the whole ACPI
tables (128kb allocated)

 # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
  - with DMA: sys 0m0.003s
  - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s

FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
boot to populate sysfs qemu_fw_cfg directory, and it is quite
small (1-2kb). Since it does not expose itself, in order to measure
the time it takes to read such small file, I took a comparable sized
file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
modified read_raw enabling DMA)

 # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
  - with DMA:
  0.636037  task-clock (msec) #0.141 CPUs utilized  
  ( +-  1.19% )
  - without DMA:
  6.430128  task-clock (msec) #0.622 CPUs utilized  
  ( +-  0.22% )

That's a few msec saved during boot by enabling DMA read (the gain
would be more substantial if other & bigger fw-cfg entries are read by
others from sysfs, unfortunately, it's not clear if we can always
enable DMA there)

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 47 ++
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index fd576ba7b337..3721dc868a2b 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -150,11 +150,13 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
 }
 
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -164,17 +166,36 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+   if (dma && fw_cfg_dma_enabled()) {
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_READ);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, 
FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count,
+ FW_CFG_DMA_CTL_READ);
+   }
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   while (pos-- > 0)
+   ioread8(fw_cfg_reg_data);
+   ioread8_rep(fw_cfg_reg_data, buf, count);
+   }
+
+end:
mutex_unlock(&fw_cfg_dev_lock);
 
acpi_release_global_lock(glk);
+
+   return ret;
 }
 
 #ifdef CONFIG_CRASH_CORE
@@ -307,7 +328,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
 #endif
 
/* verify fw_cfg device signature */
-   fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
+   fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE, false);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
fw_cfg_io_cleanup();
return -ENODEV;
@@ -494,8 +515,8 @@ static ssize_t fw_cfg_sys

[PATCH v13 1/4] crash: export paddr_vmcoreinfo_note()

2018-02-06 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.1.73.g5832b7e9f2



[PATCH v13 2/4] fw_cfg: add DMA register

2018-02-06 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v12 4/4] fw_cfg: write vmcoreinfo details

2018-02-05 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 84 ++
 1 file changed, 84 insertions(+)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 19ee776a1e92..08309939cd94 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -34,6 +34,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -57,6 +59,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
 static u32 fw_cfg_rev;
 
@@ -195,6 +199,47 @@ static ssize_t fw_cfg_read_blob(struct device *dev, u16 
key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(struct device *dev, u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(dev, NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(dev, buf, count, 
FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -314,6 +359,37 @@ struct fw_cfg_sysfs_entry {
struct device *dev;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(struct device *dev, const struct fw_cfg_file 
*f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   static struct vmci *data;
+   ssize_t ret;
+
+   data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
+
+   *data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
+
+   kfree(data);
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -555,6 +631,14 @@ static int fw_cfg_register_file(struct device *dev, const 
struct fw_cfg_file *f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (fw_cfg_dma_enabled() &&
+   strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(dev, f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v12 3/4] crash: export paddr_vmcoreinfo_note()

2018-02-05 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v12 2/4] fw_cfg: do DMA read operation

2018-02-05 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

So far, only one call in fw_cfg_register_dir_entries() is using
kmalloc'ed buf and is thus clearly eligible to DMA read. But the
fw_cfg_dma_transfer() code is common to the write operation added in a
next patch.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 132 ++---
 1 file changed, 112 insertions(+), 20 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..19ee776a1e92 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +44,22 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +68,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +92,68 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
+{
+   do {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(struct device *dev,
+   void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(struct device *dev, u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -90,17 +163,36 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+   if (dma && fw_cfg_dma_enabled()) {
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | 

[PATCH v12 1/4] fw_cfg: add DMA register

2018-02-05 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v12 0/4] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-02-05 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (entry added since qemu 2.11 with -device vmcoreinfo).

v12:
- fix virt_to_phys(NULL) panic with CONFIG_DEBUG_VIRTUAL=y
- do not use DMA read, except for kmalloc() memory we allocated
  ourself (only fw_cfg_register_dir_entries() so far)

v11:
- add #include  in last patch,
  fixing kbuild .config test

Marc-André Lureau (4):
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 269 -
 kernel/crash_core.c|   1 +
 2 files changed, 238 insertions(+), 32 deletions(-)

-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v11 4/4] fw_cfg: write vmcoreinfo details

2018-02-01 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 84 ++
 1 file changed, 84 insertions(+)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 686f0e839858..3b3cf6222c97 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -34,6 +34,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -57,6 +59,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
 static u32 fw_cfg_rev;
 
@@ -195,6 +199,47 @@ static ssize_t fw_cfg_read_blob(struct device *dev, u16 
key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(struct device *dev, u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(dev, NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(dev, buf, count, 
FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -314,6 +359,37 @@ struct fw_cfg_sysfs_entry {
struct device *dev;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(struct device *dev, const struct fw_cfg_file 
*f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   static struct vmci *data;
+   ssize_t ret;
+
+   data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
+
+   *data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
+
+   kfree(data);
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -554,6 +630,14 @@ static int fw_cfg_register_file(struct device *dev, const 
struct fw_cfg_file *f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (fw_cfg_dma_enabled() &&
+   strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(dev, f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v11 3/4] crash: export paddr_vmcoreinfo_note()

2018-02-01 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v11 2/4] fw_cfg: do DMA read operation

2018-02-01 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 131 ++---
 1 file changed, 111 insertions(+), 20 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..686f0e839858 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +44,22 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +68,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +92,68 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
+{
+   do {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(struct device *dev,
+   void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(virt_to_phys(address)),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(struct device *dev, u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -90,17 +163,36 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+   if (dma && fw_cfg_dma_enabled()) {
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG

[PATCH v11 1/4] fw_cfg: add DMA register

2018-02-01 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v11 0/4] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-02-01 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (since qemu 2.11).

v11:
- add #include  in last patch,
  fixing kbuild .config test

Marc-André Lureau (4):
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 268 -
 kernel/crash_core.c|   1 +
 2 files changed, 237 insertions(+), 32 deletions(-)

-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v10 4/4] fw_cfg: write vmcoreinfo details

2018-01-23 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 83 ++
 1 file changed, 83 insertions(+)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 686f0e839858..b4904649f316 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -57,6 +58,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
 static u32 fw_cfg_rev;
 
@@ -195,6 +198,47 @@ static ssize_t fw_cfg_read_blob(struct device *dev, u16 
key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(struct device *dev, u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(dev, NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(dev, buf, count, 
FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -314,6 +358,37 @@ struct fw_cfg_sysfs_entry {
struct device *dev;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(struct device *dev, const struct fw_cfg_file 
*f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   static struct vmci *data;
+   ssize_t ret;
+
+   data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
+
+   *data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
+
+   kfree(data);
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -554,6 +629,14 @@ static int fw_cfg_register_file(struct device *dev, const 
struct fw_cfg_file *f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (fw_cfg_dma_enabled() &&
+   strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(dev, f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v10 3/4] crash: export paddr_vmcoreinfo_note()

2018-01-23 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4f63597c824d..a93590cdd9e1 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.16.0.rc1.1.gef27df75a1



[PATCH v10 2/4] fw_cfg: do DMA read operation

2018-01-23 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 131 ++---
 1 file changed, 111 insertions(+), 20 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..686f0e839858 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +44,22 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +68,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +92,68 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
+{
+   do {
+   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(struct device *dev,
+   void *address, u32 length, u32 control)
+{
+   phys_addr_t dma;
+   struct fw_cfg_dma *d = NULL;
+   ssize_t ret = length;
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(virt_to_phys(address)),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = virt_to_phys(d);
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d);
+
+   if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+end:
+   kfree(d);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(struct device *dev, u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -90,17 +163,36 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
-   return;
+   return -EINVAL;
}
 
mutex_lock(&fw_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
-   while (pos-- > 0)
-   ioread8(fw_cfg_reg_data);
-   ioread8_rep(fw_cfg_reg_data, buf, count);
+   if (dma && fw_cfg_dma_enabled()) {
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG

[PATCH v10 1/4] fw_cfg: add DMA register

2018-01-23 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v10 0/4] fw_cfg: add DMA operations & etc/vmcoreinfo support

2018-01-23 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details (since qemu 2.11).

v10:
- merged Peter Xu fix "[PATCH] fw_cfg: don't use DMA mapping for fw_cfg device"
  (fixing fw-cfg+viommu configuration)
- switch back to kalloc'd struct vmci, to fix DMA garbage operations
- do not attempt to write vmcoreinfo if the entry exists but DMA isn't supported
- removed a copy-pasta memset() on write operation error

Marc-André Lureau (4):
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 267 -
 kernel/crash_core.c|   1 +
 2 files changed, 236 insertions(+), 32 deletions(-)

-- 
2.16.0.rc1.1.gef27df75a1



Re: [PATCH v3 0/9] memfd: add sealing to hugetlb-backed memory

2017-12-22 Thread Marc-André Lureau
Hi Mike

On Thu, Dec 21, 2017 at 1:40 AM, Mike Kravetz  wrote:
> On 12/20/2017 04:26 PM, Andrew Morton wrote:
>> On Wed, 20 Dec 2017 16:10:51 +0100 Michal Hocko  wrote:
>>
>>> On Wed 20-12-17 15:15:50, Marc-André Lureau wrote:
>>>> Hi
>>>>
>>>> On Wed, Nov 15, 2017 at 4:13 AM, Mike Kravetz  
>>>> wrote:
>>>>> +Cc: Andrew, Michal, David
>>>>>
>>>>> Are there any other comments on this patch series from Marc-André?  Is 
>>>>> anything
>>>>> else needed to move forward?
>>>>>
>>>>> I have reviewed the patches in the series.  David Herrmann (the original
>>>>> memfd_create/file sealing author) has also taken a look at the patches.
>>>>>
>>>>> One outstanding issue is sorting out the config option dependencies.  
>>>>> Although,
>>>>> IMO this is not a strict requirement for this series.  I have addressed 
>>>>> this
>>>>> issue in a follow on series:
>>>>> http://lkml.kernel.org/r/20171109014109.21077-1-mike.krav...@oracle.com
>>>>
>>>> Are we good for the next merge window? Is Hugh Dickins the maintainer
>>>> with the final word, and doing the pull request? (sorry, I am not very
>>>> familiar with kernel development)
>>>
>>> Andrew will pick it up, I assume. I will try to get and review this but
>>> there is way too much going on before holiday.
>>
>> Yup, things are quiet at present.
>>
>> I'll suck these up for a bit of testing - please let me know if you'd
>> prefer them to be held back for a cycle (ie: for 4.17-rc1)
>
> Thanks Andrew,
>
> As mentioned above there is one issue related to this series that we may
> want to address.  It is described in the series at:
> http://lkml.kernel.org/r/20171109014109.21077-1-mike.krav...@oracle.com
>
> I did not get many comments on this series/issue.  If we want to do
> something like this, now might be a good time.

I am not the best person to say, but I think that series makes a lot
of sense (and looks good to me). However, I don't think we need to
wait for it to get the sealing support added (furthermore, your rfc
series is on top).

Thanks!


-- 
Marc-André Lureau


Re: [PATCH v3 0/9] memfd: add sealing to hugetlb-backed memory

2017-12-20 Thread Marc-André Lureau
Hi

On Wed, Nov 15, 2017 at 4:13 AM, Mike Kravetz  wrote:
> +Cc: Andrew, Michal, David
>
> Are there any other comments on this patch series from Marc-André?  Is 
> anything
> else needed to move forward?
>
> I have reviewed the patches in the series.  David Herrmann (the original
> memfd_create/file sealing author) has also taken a look at the patches.
>
> One outstanding issue is sorting out the config option dependencies.  
> Although,
> IMO this is not a strict requirement for this series.  I have addressed this
> issue in a follow on series:
> http://lkml.kernel.org/r/20171109014109.21077-1-mike.krav...@oracle.com

Are we good for the next merge window? Is Hugh Dickins the maintainer
with the final word, and doing the pull request? (sorry, I am not very
familiar with kernel development)

thanks!

>> Hi,
>>
>> Recently, Mike Kravetz added hugetlbfs support to memfd. However, he
>> didn't add sealing support. One of the reasons to use memfd is to have
>> shared memory sealing when doing IPC or sharing memory with another
>> process with some extra safety. qemu uses shared memory & hugetables
>> with vhost-user (used by dpdk), so it is reasonable to use memfd
>> now instead for convenience and security reasons.
>>
>> Thanks!
>>
>> v3:
>> - do remaining MFD_DEF_SIZE/mfd_def_size substitutions
>> - fix missing unistd.h include in common.c
>> - tweaked a bit commit message prefixes
>> - added reviewed-by tags
>>
>> v2:
>> - add "memfd-hugetlb:" prefix in memfd-test
>> - run fuse test on hugetlb backend memory
>> - rename function memfd_file_get_seals() -> memfd_file_seals_ptr()
>> - update commit messages
>> - added reviewed-by tags
>>
>> RFC->v1:
>> - split rfc patch, after early review feedback
>> - added patch for memfd-test changes
>> - fix build with hugetlbfs disabled
>> - small code and commit messages improvements
>>
>> Marc-André Lureau (9):
>>   shmem: unexport shmem_add_seals()/shmem_get_seals()
>>   shmem: rename functions that are memfd-related
>>   hugetlb: expose hugetlbfs_inode_info in header
>>   hugetlb: implement memfd sealing
>>   shmem: add sealing support to hugetlb-backed memfd
>>   memfd-test: test hugetlbfs sealing
>>   memfd-test: add 'memfd-hugetlb:' prefix when testing hugetlbfs
>>   memfd-test: move common code to a shared unit
>>   memfd-test: run fuse test on hugetlb backend memory
>>
>>  fs/fcntl.c |   2 +-
>>  fs/hugetlbfs/inode.c   |  39 +++--
>>  include/linux/hugetlb.h|  11 ++
>>  include/linux/shmem_fs.h   |   6 +-
>>  mm/shmem.c |  59 ---
>>  tools/testing/selftests/memfd/Makefile |   5 +
>>  tools/testing/selftests/memfd/common.c |  46 ++
>>  tools/testing/selftests/memfd/common.h |   9 ++
>>  tools/testing/selftests/memfd/fuse_test.c  |  44 +++--
>>  tools/testing/selftests/memfd/memfd_test.c | 212 
>> -
>>  tools/testing/selftests/memfd/run_fuse_test.sh |   2 +-
>>  tools/testing/selftests/memfd/run_tests.sh |   1 +
>>  12 files changed, 200 insertions(+), 236 deletions(-)
>>  create mode 100644 tools/testing/selftests/memfd/common.c
>>  create mode 100644 tools/testing/selftests/memfd/common.h
>>



-- 
Marc-André Lureau


[PATCH v9 1/4] fw_cfg: add DMA register

2017-12-05 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v9 3/4] crash: export paddr_vmcoreinfo_note()

2017-12-05 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..2394f0501c65 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v9 4/4] fw_cfg: write vmcoreinfo details

2017-12-05 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 82 +-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index e261d9254b29..61af1c0a640d 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -35,6 +35,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -58,6 +60,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
 static u32 fw_cfg_rev;
 
@@ -119,7 +123,8 @@ static ssize_t fw_cfg_dma_transfer(struct device *dev,
dma_addr_t dma;
ssize_t ret = length;
enum dma_data_direction dir =
-   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
+   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
 
if (address && length) {
dma_addr = dma_map_single(dev, address, length, dir);
@@ -216,6 +221,48 @@ static ssize_t fw_cfg_read_blob(struct device *dev, u16 
key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(struct device *dev, u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   memset(buf, 0, count);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(dev, buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(dev, NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(dev, buf, count, 
FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -335,6 +382,32 @@ struct fw_cfg_sysfs_entry {
struct device *dev;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(struct device *dev, const struct fw_cfg_file 
*f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   static struct vmci data;
+   ssize_t ret;
+
+   data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   ret = fw_cfg_write_blob(dev, f->select, &data, 0, sizeof(struct vmci));
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -575,6 +648,13 @@ static int fw_cfg_register_file(struct device *dev, const 
struct fw_cfg_file *f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(dev, f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v9 0/4] fw_cfg: add DMA operations & etc/vmcoreinfo support

2017-12-05 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details.

Note: the support for this entry handling has been merged for upcoming
qemu release (2.11).

v9:
- do not rely on a global struct device * (Michael)
- fix dma_mapping_error() calls to take the dev* argument
- initialize DMA ops with dma_set_mask_and_coherent()
- rebased

v8:
- fix ltp again: bring back kmalloc() for DMA memory (see "What memory
  is DMA'able?" limitations in Documentation/DMA-API-HOWTO.txt)
  Interestingly, it didn't fail when the module was linked in..

v7:
- add a patch to fix driver remove()
- remove DMA operatiom timeout (qemu finishes sync today)
- synchronize the DMA transfer before reading from CPU
- removed kmalloc() use static allocation instead
- drop some r-b tags

v6:
- change acpi_acquire_global_lock() error to return EINVAL
  (instead of EBUSY)
- replace 0 as pointer argument for NULL
- add Gabriel r-b/a-b tags

v5:
- resent to CC kdump people on the paddr_vmcoreinfo_note() export patch

v4:
- export paddr_vmcoreinfo_note() to fix fw_cfg.ko build
- fix build with !CONFIG_CRASH_CORE
- replace the unbounded yield() loop with a usleep_range() loop and a
  200ms timeout
- do not write vmcoreinfo entry when running the kdump kernel (D. Hatayama)
- drop the experimental sysfs write support patch from this series

v3: (thanks kbuild)
- add "fw_cfg: fix the command line module name" patch
- fix build of "fw_cfg: add DMA register" with CONFIG_FW_CFG_SYSFS_CMDLINE=y
- fix 'Wshift-count-overflow'

v2:
- use platform device for dma mapping
- add etc/vmcoreinfo patch
- some code cleanups

Marc-André Lureau (4):
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 294 -
 kernel/crash_core.c|   1 +
 2 files changed, 263 insertions(+), 32 deletions(-)

-- 
2.15.0.277.ga3d2ad2c43



[PATCH v9 2/4] fw_cfg: do DMA read operation

2017-12-05 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 161 -
 1 file changed, 141 insertions(+), 20 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..e261d9254b29 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +45,22 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +69,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +93,88 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct device *dev,
+   struct fw_cfg_dma *d, dma_addr_t dma)
+{
+   do {
+   dma_sync_single_for_cpu(dev, dma, sizeof(*d), DMA_FROM_DEVICE);
+   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(struct device *dev,
+   void *address, u32 length, u32 control)
+{
+   dma_addr_t dma_addr = 0;
+   struct fw_cfg_dma *d = NULL;
+   dma_addr_t dma;
+   ssize_t ret = length;
+   enum dma_data_direction dir =
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+
+   if (address && length) {
+   dma_addr = dma_map_single(dev, address, length, dir);
+   if (dma_mapping_error(dev, dma_addr)) {
+   WARN(1, "%s: failed to map address\n", __func__);
+   return -EFAULT;
+   }
+   }
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(dma_addr),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = dma_map_single(dev, d, sizeof(*d), DMA_BIDIRECTIONAL);
+   if (dma_mapping_error(dev, dma)) {
+   WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
+   ret = -EFAULT;
+   goto end;
+   }
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(dev, d, dma);
+
+   if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+   dma_unmap_single(dev, dma, sizeof(*d), DMA_BIDIRECTIONAL);
+
+end:
+   kfree(d);
+   if (dma_addr)
+   dma_unmap_single(dev, dma_addr, length, dir);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(struct device *dev, u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure m

[PATCH v8 1/5] fw_cfg: fix driver remove

2017-11-23 Thread Marc-André Lureau
On driver remove(), all objects created during probe() should be
removed, but sysfs qemu_fw_cfg/rev file was left. Also reorder
functions to match probe() error cleanup code.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5cfe39f7a45f..deb483064f53 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device 
*pdev)
 {
pr_debug("fw_cfg: unloading.\n");
fw_cfg_sysfs_cache_cleanup();
+   sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
+   fw_cfg_io_cleanup();
fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
-   fw_cfg_io_cleanup();
return 0;
 }
 
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v8 4/5] crash: export paddr_vmcoreinfo_note()

2017-11-23 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..2394f0501c65 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v8 5/5] fw_cfg: write vmcoreinfo details

2017-11-23 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 82 +-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index b7b5c88a3422..e63fd0fddb61 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -35,6 +35,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -58,6 +60,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* platform device for dma mapping */
 static struct device *dev;
 
@@ -120,7 +124,8 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
dma_addr_t dma;
ssize_t ret = length;
enum dma_data_direction dir =
-   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
+   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
 
if (address && length) {
dma_addr = dma_map_single(dev, address, length, dir);
@@ -217,6 +222,48 @@ static ssize_t fw_cfg_read_blob(u16 key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   memset(buf, 0, count);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -335,6 +382,32 @@ struct fw_cfg_sysfs_entry {
struct list_head list;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(const struct fw_cfg_file *f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   static struct vmci data;
+   ssize_t ret;
+
+   data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   ret = fw_cfg_write_blob(f->select, &data, 0, sizeof(struct vmci));
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -574,6 +647,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v8 2/5] fw_cfg: add DMA register

2017-11-23 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v8 3/5] fw_cfg: do DMA read operation

2017-11-23 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 147 -
 1 file changed, 130 insertions(+), 17 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..b7b5c88a3422 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +45,25 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* platform device for dma mapping */
+static struct device *dev;
+
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +72,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +96,86 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
+{
+   do {
+   dma_sync_single_for_cpu(dev, dma, sizeof(*d), DMA_FROM_DEVICE);
+   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   dma_addr_t dma_addr = 0;
+   struct fw_cfg_dma *d = NULL;
+   dma_addr_t dma;
+   ssize_t ret = length;
+   enum dma_data_direction dir =
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+
+   if (address && length) {
+   dma_addr = dma_map_single(dev, address, length, dir);
+   if (dma_mapping_error(NULL, dma_addr)) {
+   WARN(1, "%s: failed to map address\n", __func__);
+   return -EFAULT;
+   }
+   }
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(dma_addr),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = dma_map_single(dev, d, sizeof(*d), DMA_BIDIRECTIONAL);
+   if (dma_mapping_error(NULL, dma)) {
+   WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
+   ret = -EFAULT;
+   goto end;
+   }
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(d, dma);
+
+   if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+   dma_unmap_single(dev, dma, sizeof(*d), DMA_BIDIRECTIONAL);
+
+end:
+   kfree(d);
+   if (dma_addr)
+   dma_unmap_single(dev, dma_addr, length, dir);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 

[PATCH v8 0/5] fw_cfg: add DMA operations & etc/vmcoreinfo support

2017-11-23 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details.

Note: the support for this entry handling has been merged for upcoming
qemu release (2.11).

v8:
- fix ltp again: bring back kmalloc() for DMA memory (see "What memory
  is DMA'able?" limitations in Documentation/DMA-API-HOWTO.txt)
  Interestingly, it didn't fail when the module was linked in..

v7:
- add a patch to fix driver remove()
- remove DMA operatiom timeout (qemu finishes sync today)
- synchronize the DMA transfer before reading from CPU
- removed kmalloc() use static allocation instead
- drop some r-b tags

v6:
- change acpi_acquire_global_lock() error to return EINVAL
  (instead of EBUSY)
- replace 0 as pointer argument for NULL
- add Gabriel r-b/a-b tags

v5:
- resent to CC kdump people on the paddr_vmcoreinfo_note() export patch

v4:
- export paddr_vmcoreinfo_note() to fix fw_cfg.ko build
- fix build with !CONFIG_CRASH_CORE
- replace the unbounded yield() loop with a usleep_range() loop and a
  200ms timeout
- do not write vmcoreinfo entry when running the kdump kernel (D. Hatayama)
- drop the experimental sysfs write support patch from this series

v3: (thanks kbuild)
- add "fw_cfg: fix the command line module name" patch
- fix build of "fw_cfg: add DMA register" with CONFIG_FW_CFG_SYSFS_CMDLINE=y
- fix 'Wshift-count-overflow'

v2:
- use platform device for dma mapping
- add etc/vmcoreinfo patch
- some code cleanups

Marc-André Lureau (5):
  fw_cfg: fix driver remove
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 283 -
 kernel/crash_core.c|   1 +
 2 files changed, 254 insertions(+), 30 deletions(-)

-- 
2.15.0.277.ga3d2ad2c43



Re: [PATCH v7 3/5] fw_cfg: do DMA read operation

2017-11-21 Thread Marc-André Lureau
Hi

- Original Message -
> On Mon, Nov 20, 2017 at 10:55:17AM +0100, Marc-André Lureau wrote:
> > Modify fw_cfg_read_blob() to use DMA if the device supports it.
> > Return errors, because the operation may fail.
> > 
> > The DMA operation is expected to run synchronously with today qemu,
> > but the specification states that it may become async, so we run
> > "control" field check in a loop for eventual changes.
> > 
> > We may want to switch all the *buf addresses to use only kmalloc'ed
> > buffers (instead of using stack/image addresses with dma=false).
> > 
> > Signed-off-by: Marc-André Lureau 
> > ---
> >  drivers/firmware/qemu_fw_cfg.c | 140
> >  -
> >  1 file changed, 123 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/firmware/qemu_fw_cfg.c
> > b/drivers/firmware/qemu_fw_cfg.c
> > index 740df0df2260..e0fe6ff037c3 100644
> > --- a/drivers/firmware/qemu_fw_cfg.c
> > +++ b/drivers/firmware/qemu_fw_cfg.c
> > @@ -33,6 +33,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >  
> >  MODULE_AUTHOR("Gabriel L. Somlo ");
> >  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
> > @@ -43,12 +45,25 @@ MODULE_LICENSE("GPL");
> >  #define FW_CFG_ID 0x01
> >  #define FW_CFG_FILE_DIR   0x19
> >  
> > +#define FW_CFG_VERSION_DMA 0x02
> > +#define FW_CFG_DMA_CTL_ERROR   0x01
> > +#define FW_CFG_DMA_CTL_READ0x02
> > +#define FW_CFG_DMA_CTL_SKIP0x04
> > +#define FW_CFG_DMA_CTL_SELECT  0x08
> > +#define FW_CFG_DMA_CTL_WRITE   0x10
> > +
> >  /* size in bytes of fw_cfg signature */
> >  #define FW_CFG_SIG_SIZE 4
> >  
> >  /* fw_cfg "file name" is up to 56 characters (including terminating nul)
> >  */
> >  #define FW_CFG_MAX_FILE_PATH 56
> >  
> > +/* platform device for dma mapping */
> > +static struct device *dev;
> > +
> > +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir.
> > */
> > +static u32 fw_cfg_rev;
> > +
> >  /* fw_cfg file directory entry type */
> >  struct fw_cfg_file {
> > u32 size;
> > @@ -57,6 +72,12 @@ struct fw_cfg_file {
> > char name[FW_CFG_MAX_FILE_PATH];
> >  };
> >  
> > +struct fw_cfg_dma {
> > +   u32 control;
> > +   u32 length;
> > +   u64 address;
> > +} __packed;
> > +
> >  /* fw_cfg device i/o register addresses */
> >  static bool fw_cfg_is_mmio;
> >  static phys_addr_t fw_cfg_p_base;
> 
> Drop __packed please. It causes many gcc versions to do insane things.
> Can be a patch on top.

Oh? I think __packed should translate to __attribute__((packed)) 
(include/linux/compiler-gcc.h), there would be serious problems if gcc "do 
insane things" with it.

> 
> 
> > @@ -75,12 +96,79 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
> > return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
> >  }
> >  
> > +static inline bool fw_cfg_dma_enabled(void)
> > +{
> > +   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
> > +}
> > +
> > +/* qemu fw_cfg device is sync today, but spec says it may become async */
> > +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
> > +{
> > +   do {
> > +   dma_sync_single_for_cpu(dev, dma, sizeof(*d), DMA_FROM_DEVICE);
> > +   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
> > +   return;
> > +
> > +   usleep_range(50, 100);
> 
> And since in practice we never get to this line,
> maybe we should just go back to yield here.

Or cond_resched() ?

> 
> > +   } while (true);
> > +}
> > +
> > +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
> > +{
> > +   dma_addr_t dma_addr = 0;
> > +   static struct fw_cfg_dma d;
> > +   dma_addr_t dma;
> > +   ssize_t ret = length;
> > +   enum dma_data_direction dir =
> > +   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
> > +
> > +   if (address && length) {
> > +   dma_addr = dma_map_single(dev, address, length, dir);
> > +   if (dma_mapping_error(NULL, dma_addr)) {
> > +   WARN(1, "%s: failed to map address\n", __func__);
> > +   return -EFAULT;
> > +   }
> > +   }
> > +
> > +   d = (st

Re: [RFC PATCH 0/3] restructure memfd code

2017-11-20 Thread Marc-André Lureau
Hi

On Thu, Nov 9, 2017 at 2:41 AM, Mike Kravetz  wrote:
> With the addition of memfd hugetlbfs support, we now have the situation
> where memfd depends on TMPFS -or- HUGETLBFS.  Previously, memfd was only
> supported on tmpfs, so it made sense that the code resides in shmem.c.
>
> This patch series moves the memfd code to separate files (memfd.c and
> memfd.h).  It creates a new config option MEMFD_CREATE that is defined
> if either TMPFS or HUGETLBFS is defined.

That looks good to me

>
> In the current code, memfd is only functional if TMPFS is defined.  If
> HUGETLFS is defined and TMPFS is not defined, then memfd functionality
> will not be available for hugetlbfs.  This does not cause BUGs, just a
> potential lack of desired functionality.
>

Indeed

> Another way to approach this issue would be to simply make HUGETLBFS
> depend on TMPFS.
>
> This patch series is built on top of the Marc-André Lureau v3 series
> "memfd: add sealing to hugetlb-backed memory":
> http://lkml.kernel.org/r/20171107122800.25517-1-marcandre.lur...@redhat.com

Are you waiting for this series to be merged before resending as non-rfc?

>
> Mike Kravetz (3):
>   mm: hugetlbfs: move HUGETLBFS_I outside #ifdef CONFIG_HUGETLBFS
>   mm: memfd: split out memfd for use by multiple filesystems
>   mm: memfd: remove memfd code from shmem files and use new memfd files
>
>  fs/Kconfig   |   3 +
>  fs/fcntl.c   |   2 +-
>  include/linux/hugetlb.h  |  27 ++--
>  include/linux/memfd.h|  16 +++
>  include/linux/shmem_fs.h |  13 --
>  mm/Makefile  |   1 +
>  mm/memfd.c   | 341 
> +++
>  mm/shmem.c   | 323 
>  8 files changed, 378 insertions(+), 348 deletions(-)
>  create mode 100644 include/linux/memfd.h
>  create mode 100644 mm/memfd.c
>
> --
> 2.13.6
>

Thanks

-- 
Marc-André Lureau


[PATCH v7 0/5] fw_cfg: add DMA operations & etc/vmcoreinfo support

2017-11-20 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details.

Note: the support for this entry handling has been merged for upcoming
qemu release (2.11).

v7:
- add a patch to fix driver remove()
- remove DMA operatiom timeout (qemu finishes sync today)
- synchronize the DMA transfer before reading from CPU
- removed kmalloc() use static allocation instead
- drop some r-b tags

v6:
- change acpi_acquire_global_lock() error to return EINVAL
  (instead of EBUSY)
- replace 0 as pointer argument for NULL
- add Gabriel r-b/a-b tags

v5:
- resent to CC kdump people on the paddr_vmcoreinfo_note() export patch

v4:
- export paddr_vmcoreinfo_note() to fix fw_cfg.ko build
- fix build with !CONFIG_CRASH_CORE
- replace the unbounded yield() loop with a usleep_range() loop and a
  200ms timeout
- do not write vmcoreinfo entry when running the kdump kernel (D. Hatayama)
- drop the experimental sysfs write support patch from this series

v3: (thanks kbuild)
- add "fw_cfg: fix the command line module name" patch
- fix build of "fw_cfg: add DMA register" with CONFIG_FW_CFG_SYSFS_CMDLINE=y
- fix 'Wshift-count-overflow'

v2:
- use platform device for dma mapping
- add etc/vmcoreinfo patch
- some code cleanups

Marc-André Lureau (5):
  fw_cfg: fix driver remove
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 276 -
 kernel/crash_core.c|   1 +
 2 files changed, 247 insertions(+), 30 deletions(-)

-- 
2.15.0.277.ga3d2ad2c43



[PATCH v7 1/5] fw_cfg: fix driver remove

2017-11-20 Thread Marc-André Lureau
On driver remove(), all objects created during probe() should be
removed, but sysfs qemu_fw_cfg/rev file was left. Also reorder
functions to match probe() error cleanup code.

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5cfe39f7a45f..deb483064f53 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device 
*pdev)
 {
pr_debug("fw_cfg: unloading.\n");
fw_cfg_sysfs_cache_cleanup();
+   sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
+   fw_cfg_io_cleanup();
fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
-   fw_cfg_io_cleanup();
return 0;
 }
 
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v7 5/5] fw_cfg: write vmcoreinfo details

2017-11-20 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 82 +-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index e0fe6ff037c3..66d2fd6e9452 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -35,6 +35,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -58,6 +60,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* platform device for dma mapping */
 static struct device *dev;
 
@@ -120,7 +124,8 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
dma_addr_t dma;
ssize_t ret = length;
enum dma_data_direction dir =
-   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
+   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
 
if (address && length) {
dma_addr = dma_map_single(dev, address, length, dir);
@@ -210,6 +215,48 @@ static ssize_t fw_cfg_read_blob(u16 key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   memset(buf, 0, count);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -328,6 +375,32 @@ struct fw_cfg_sysfs_entry {
struct list_head list;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(const struct fw_cfg_file *f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   static struct vmci data;
+   ssize_t ret;
+
+   data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   ret = fw_cfg_write_blob(f->select, &data, 0, sizeof(struct vmci));
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -567,6 +640,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v7 4/5] crash: export paddr_vmcoreinfo_note()

2017-11-20 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..2394f0501c65 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.15.0.277.ga3d2ad2c43



[PATCH v7 3/5] fw_cfg: do DMA read operation

2017-11-20 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

The DMA operation is expected to run synchronously with today qemu,
but the specification states that it may become async, so we run
"control" field check in a loop for eventual changes.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
---
 drivers/firmware/qemu_fw_cfg.c | 140 -
 1 file changed, 123 insertions(+), 17 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 740df0df2260..e0fe6ff037c3 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +45,25 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* platform device for dma mapping */
+static struct device *dev;
+
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +72,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +96,79 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+/* qemu fw_cfg device is sync today, but spec says it may become async */
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
+{
+   do {
+   dma_sync_single_for_cpu(dev, dma, sizeof(*d), DMA_FROM_DEVICE);
+   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return;
+
+   usleep_range(50, 100);
+   } while (true);
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   dma_addr_t dma_addr = 0;
+   static struct fw_cfg_dma d;
+   dma_addr_t dma;
+   ssize_t ret = length;
+   enum dma_data_direction dir =
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+
+   if (address && length) {
+   dma_addr = dma_map_single(dev, address, length, dir);
+   if (dma_mapping_error(NULL, dma_addr)) {
+   WARN(1, "%s: failed to map address\n", __func__);
+   return -EFAULT;
+   }
+   }
+
+   d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(dma_addr),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   dma = dma_map_single(dev, &d, sizeof(d), DMA_BIDIRECTIONAL);
+   if (dma_mapping_error(NULL, dma)) {
+   WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
+   ret = -EFAULT;
+   goto end;
+   }
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   fw_cfg_wait_for_control(&d, dma);
+
+   if (be32_to_cpu(d.control) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+   dma_unmap_single(dev, dma, sizeof(d), DMA_BIDIRECTIONAL);
+
+end:
+   if (dma_addr)
+   dma_unmap_single(dev, dma_addr, length, dir);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {
u32 glk = -1U;
acpi_status status;
+   ssize_t ret = count;
 
/* If we have ACPI, ensure mutual exclusion against any potential
 * device access by the firmware, e.g. via AML methods:
@@ -90,17 +178,36 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should

[PATCH v7 2/5] fw_cfg: add DMA register

2017-11-20 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

Re: [lkp-robot] [fw_cfg] 05b5d5161b: WARNING:at_drivers/firmware/qemu_fw_cfg.c:#fw_cfg_dma_transfer

2017-11-17 Thread Marc-André Lureau
Hi

- Original Message -
> On 11/16, Michael S. Tsirkin wrote:
> >On Thu, Nov 16, 2017 at 08:58:13AM +0800, kernel test robot wrote:
> >> 
> >> FYI, we noticed the following commit (built with gcc-6):
> >> 
> >> commit: 05b5d5161b9e6c72e1d06f36614edbdbfe192cc7 ("fw_cfg: do DMA read
> >> operation")
> >> https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> >> 
> >> in testcase: boot
> >> 
> >> on test machine: qemu-system-x86_64 -enable-kvm -cpu host -smp 2 -m 4G
> >> 
> >> caused below changes (please refer to attached dmesg/kmsg for entire
> >> log/backtrace):
> >
> >So this most likely indicates compatibility issues with the specific
> >qemu version. Can you please tell us which qemu version was used
> >for this testing?
> 
> root@lkp-nex04 ~# qemu-system-x86_64 --version
> QEMU emulator version 2.10.0(Debian 1:2.10.0+dfsg-1)
> Copyright (c) 2003-2017 Fabrice Bellard and the QEMU Project developers

Good news :-., I can reproduce the timeout with upstream kernel/qemu & lkp 
test, working on it.

thanks

> 
> Thanks,
> Xiaolong
> >
> 
> >Thanks!
> >
> >
> >> 
> >> ++++
> >> || 
> >> 102b01757e
> >> || |
> >> || 
> >> 05b5d5161b
> >> || |
> >> ++++
> >> | boot_successes | 0
> >> | | 0  |
> >> | boot_failures  | 12
> >> | | 12 |
> >> | genirq:Flags_mismatch_irq##(ttyS0)vs.#(sir_ir) | 12
> >> | | 12 |
> >> | WARNING:at_drivers/firmware/qemu_fw_cfg.c:#fw_cfg_dma_transfer | 0
> >> | | 8  |
> >> | RIP:fw_cfg_dma_transfer| 0
> >> | | 8  |
> >> | WARNING:at_fs/sysfs/dir.c:#sysfs_warn_dup  | 0
> >> | | 8  |
> >> | RIP:sysfs_warn_dup | 0
> >> | | 8  |
> >> | WARNING:at_lib/kobject.c:#kobject_add_internal | 0
> >> | | 8  |
> >> | RIP:kobject_add_internal   | 0
> >> | | 8  |
> >> ++++
> >> 
> >> 
> >> 
> >> [  156.143041] WARNING: CPU: 0 PID: 1 at
> >> drivers/firmware/qemu_fw_cfg.c:163 fw_cfg_dma_transfer+0x55d/0x600
> >> [  156.143041] CPU: 0 PID: 1 Comm: swapper Not tainted
> >> 4.14.0-7-g05b5d51 #1
> >> [  156.143041] task: 88013f02e000 task.stack: c9008000
> >> [  156.143041] RIP: 0010:fw_cfg_dma_transfer+0x55d/0x600
> >> [  156.143041] RSP: :c900bc08 EFLAGS: 00010202
> >> [  156.143041] RAX: 001c RBX: 880116d761c8 RCX:
> >> 
> >> [  156.143041] RDX:  RSI: 0004 RDI:
> >> 0202
> >> [  156.143041] RBP: 00245ac7fec4 R08: 0001 R09:
> >> 
> >> [  156.143041] R10: c906be20 R11: 99779f29 R12:
> >> 0004
> >> [  156.143041] R13: 0004 R14: bbfdc000 R15:
> >> 
> >> [  156.143041] FS:  () GS:83252000()
> >> knlGS:
> >> [  156.143041] CS:  0010 DS:  ES:  CR0: 80050033
> >> [  156.143041] CR2:  CR3: 03215000 CR4:
> >> 06b0
> >> [  156.143041] Call Trace:
> >> [  156.143041]  ? fw_cfg_read_blob+0x192/0x2d0
> >> [  156.143041]  ? fw_cfg_register_dir_entries+0xaa/0x560
> >> [  156.143041]  ? fw_cfg_sysfs_probe+0x408/0x590
> >> [  156.143041]  ? fw_cfg_sysfs_read_raw+0xa0/0xa0
> >> [  156.143041]  ? platform_drv_probe+0x98/0x180
> >> [  156.143041]  ? platform_drv_remove+0x70/0x70
> >> [  156.143041]  ? really_probe+0x2ca/0x770
> >> [  156.143041]  ? driver_probe_device+0x170/0x170
> >> [  156.143041]  ? driver_probe_device+0xf8/0x170
> >> [  156.143041]  ? driver_probe_device+0x170/0x170
> >> [  156.143041]  ? __driver_attach+0x189/0x1f0
> >> [  156.143041]  ? bus_for_each_dev+0xc3/0x140
> >> [  156.143041]  ? driver_attach+0x26/0x30
> >> [  156.143041]  ? bus_add_driver+0x1fd/0x420
> >> [  156.143041]  ? firmware_map_add_early+0xef/0xef
> >> [  156.143041]  ? driver_register+0x146/0x1c0
> >> [  156.143041]  ? __platform_driver_register+0x42/0x50
> >> [  156.143041]  ? fw_cfg_sysfs_init+0x85/0x104
> >> [  156.143041]  ? firmware_map_add_early+0xef/0xef
> >> [  156.143041]  ? do_one_initcall+0x132/0x339
> >> [  156.143041]  ? kernel_init_freeable+0x269/0x425
> >> [  156.143041]  ? rest_init+0x150/0x150
> >> [  156.143041]  ? kernel_init+0x17/0x220
> >> [  156.143041] 

Re: [PATCH v6 5/5] fw_cfg: write vmcoreinfo details

2017-11-16 Thread Marc-André Lureau
Hi

- Original Message -
> On Mon, Nov 13, 2017 at 08:29:58PM +0100, Marc-André Lureau wrote:
> > If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
> > the kdump kernel, write the addr/size of the vmcoreinfo ELF note.
> > 
> > Signed-off-by: Marc-André Lureau 
> > Reviewed-by: Gabriel Somlo 
> > ---
> >  drivers/firmware/qemu_fw_cfg.c | 87
> >  +-
> >  1 file changed, 86 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/firmware/qemu_fw_cfg.c
> > b/drivers/firmware/qemu_fw_cfg.c
> > index 2ac4cd869fe6..7a70e7a549f6 100644
> > --- a/drivers/firmware/qemu_fw_cfg.c
> > +++ b/drivers/firmware/qemu_fw_cfg.c
> > @@ -35,6 +35,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >  
> >  MODULE_AUTHOR("Gabriel L. Somlo ");
> >  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
> > @@ -59,6 +61,8 @@ MODULE_LICENSE("GPL");
> >  /* fw_cfg "file name" is up to 56 characters (including terminating nul)
> >  */
> >  #define FW_CFG_MAX_FILE_PATH 56
> >  
> > +#define VMCOREINFO_FORMAT_ELF 0x1
> > +
> >  /* platform device for dma mapping */
> >  static struct device *dev;
> >  
> > @@ -127,7 +131,8 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32
> > length, u32 control)
> > dma_addr_t dma;
> > ssize_t ret = length;
> > enum dma_data_direction dir =
> > -   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
> > +   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
> > +   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
> >  
> > if (address && length) {
> > dma_addr = dma_map_single(dev, address, length, dir);
> > @@ -225,6 +230,48 @@ static ssize_t fw_cfg_read_blob(u16 key,
> > return ret;
> >  }
> >  
> > +#ifdef CONFIG_CRASH_CORE
> > +/* write chunk of given fw_cfg blob (caller responsible for sanity-check)
> > */
> > +static ssize_t fw_cfg_write_blob(u16 key,
> > +void *buf, loff_t pos, size_t count)
> > +{
> > +   u32 glk = -1U;
> > +   acpi_status status;
> > +   ssize_t ret = count;
> > +
> > +   /* If we have ACPI, ensure mutual exclusion against any potential
> > +* device access by the firmware, e.g. via AML methods:
> > +*/
> > +   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
> > +   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
> > +   /* Should never get here */
> > +   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
> > +   memset(buf, 0, count);
> > +   return -EINVAL;
> > +   }
> > +
> > +   mutex_lock(&fw_cfg_dev_lock);
> > +   if (pos == 0) {
> > +   ret = fw_cfg_dma_transfer(buf, count, key << 16
> > + | FW_CFG_DMA_CTL_SELECT
> > + | FW_CFG_DMA_CTL_WRITE);
> > +   } else {
> > +   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
> > +   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
> > +   if (ret < 0)
> > +   goto end;
> > +   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
> > +   }
> > +
> > +end:
> > +   mutex_unlock(&fw_cfg_dev_lock);
> > +
> > +   acpi_release_global_lock(glk);
> > +
> > +   return ret;
> > +}
> > +#endif /* CONFIG_CRASH_CORE */
> > +
> >  /* clean up fw_cfg device i/o */
> >  static void fw_cfg_io_cleanup(void)
> >  {
> > @@ -343,6 +390,37 @@ struct fw_cfg_sysfs_entry {
> > struct list_head list;
> >  };
> >  
> > +#ifdef CONFIG_CRASH_CORE
> > +static ssize_t write_vmcoreinfo(const struct fw_cfg_file *f)
> > +{
> > +   struct vmci {
> > +   __le16 host_format;
> > +   __le16 guest_format;
> > +   __le32 size;
> > +   __le64 paddr;
> > +   } __packed;
> > +   struct vmci *data;
> > +   ssize_t ret;
> > +
> > +   data = kmalloc(sizeof(struct vmci), GFP_KERNEL | GFP_DMA);
> > +   if (!data)
> > +   return -ENOMEM;
> 
> It's a small bit of data - you can just keep it in a global variable,
> this way failures won't be an issue.

It would still need to be allocated with GFP_DMA. Since it's a one t

Re: [PATCH v6 3/5] fw_cfg: do DMA read operation

2017-11-16 Thread Marc-André Lureau
Hi

- Original Message -
> On Mon, Nov 13, 2017 at 08:29:56PM +0100, Marc-André Lureau wrote:
> > Modify fw_cfg_read_blob() to use DMA if the device supports it.
> > Return errors, because the operation may fail.
> > 
> > To avoid polling with unbound amount of time, the DMA operation is
> > expected to complete within 200ms, or will return ETIME error.
> > 
> > We may want to switch all the *buf addresses to use only kmalloc'ed
> > buffers (instead of using stack/image addresses with dma=false).
> > 
> > Signed-off-by: Marc-André Lureau 
> > Reviewed-by: Gabriel Somlo 
> > ---
> >  drivers/firmware/qemu_fw_cfg.c | 154
> >  -
> >  1 file changed, 137 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/firmware/qemu_fw_cfg.c
> > b/drivers/firmware/qemu_fw_cfg.c
> > index 1f3e8545dab7..2ac4cd869fe6 100644
> > --- a/drivers/firmware/qemu_fw_cfg.c
> > +++ b/drivers/firmware/qemu_fw_cfg.c
> > @@ -33,6 +33,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >  
> >  MODULE_AUTHOR("Gabriel L. Somlo ");
> >  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
> > @@ -43,12 +45,26 @@ MODULE_LICENSE("GPL");
> >  #define FW_CFG_ID 0x01
> >  #define FW_CFG_FILE_DIR   0x19
> >  
> > +#define FW_CFG_VERSION_DMA 0x02
> > +#define FW_CFG_DMA_CTL_ERROR   0x01
> > +#define FW_CFG_DMA_CTL_READ0x02
> > +#define FW_CFG_DMA_CTL_SKIP0x04
> > +#define FW_CFG_DMA_CTL_SELECT  0x08
> > +#define FW_CFG_DMA_CTL_WRITE   0x10
> > +#define FW_CFG_DMA_TIMEOUT 200 /* ms */
> > +
> >  /* size in bytes of fw_cfg signature */
> >  #define FW_CFG_SIG_SIZE 4
> >  
> >  /* fw_cfg "file name" is up to 56 characters (including terminating nul)
> >  */
> >  #define FW_CFG_MAX_FILE_PATH 56
> >  
> > +/* platform device for dma mapping */
> > +static struct device *dev;
> > +
> > +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir.
> > */
> > +static u32 fw_cfg_rev;
> > +
> >  /* fw_cfg file directory entry type */
> >  struct fw_cfg_file {
> > u32 size;
> > @@ -57,6 +73,12 @@ struct fw_cfg_file {
> > char name[FW_CFG_MAX_FILE_PATH];
> >  };
> >  
> > +struct fw_cfg_dma {
> > +   u32 control;
> > +   u32 length;
> > +   u64 address;
> > +} __packed;
> > +
> >  /* fw_cfg device i/o register addresses */
> >  static bool fw_cfg_is_mmio;
> >  static phys_addr_t fw_cfg_p_base;
> > @@ -75,12 +97,93 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
> > return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
> >  }
> >  
> > +static inline bool fw_cfg_dma_enabled(void)
> > +{
> > +   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
> > +}
> > +
> > +static bool fw_cfg_wait_for_control(struct fw_cfg_dma *d, unsigned long
> > timeout)
> > +{
> > +   ktime_t start;
> > +   ktime_t stop;
> > +
> > +   start = ktime_get();
> > +   stop = ktime_add(start, ms_to_ktime(timeout));
> > +
> > +   do {
> > +   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
> > +   return true;
> > +
> > +   usleep_range(50, 100);
> 
> BTW it's not nice that this is uninterruptible. I think we need a
> variant of usleep_range that is interruptible and call that here.
> Thoughts?
> 

This usleep_range() pattern is pretty common apparently.

(and it's probably better than calling yield() in a loop, like v1-3 did) 
> 
> > +   } while (ktime_before(ktime_get(), stop));
> > +
> > +   return false;
> > +}
> > +
> > +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
> > +{
> > +   dma_addr_t dma_addr = 0;
> > +   struct fw_cfg_dma *d;
> > +   dma_addr_t dma;
> > +   ssize_t ret = length;
> > +   enum dma_data_direction dir =
> > +   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
> > +
> > +   if (address && length) {
> > +   dma_addr = dma_map_single(dev, address, length, dir);
> > +   if (dma_mapping_error(NULL, dma_addr)) {
> > +   WARN(1, "%s: failed to map address\n", __func__);
> > +   return -EFAULT;
> > +   }
> > +   }
> > +
> > +   d = kmalloc(sizeof(*d), GFP_KERNEL | GFP_DMA)

Re: [PATCH v6 3/5] fw_cfg: do DMA read operation

2017-11-16 Thread Marc-André Lureau
Hi

- Original Message -
> On Mon, Nov 13, 2017 at 08:27:48PM +0100, Marc-André Lureau wrote:
> > Modify fw_cfg_read_blob() to use DMA if the device supports it.
> > Return errors, because the operation may fail.
> > 
> > To avoid polling with unbound amount of time, the DMA operation is
> > expected to complete within 200ms, or will return ETIME error.
> > 
> > We may want to switch all the *buf addresses to use only kmalloc'ed
> > buffers (instead of using stack/image addresses with dma=false).
> > 
> > Signed-off-by: Marc-André Lureau 
> > Reviewed-by: Gabriel Somlo 
> 
> This is causing issues for some people. I have reverted it for now.

I didn't know you merged it, where did it go?

If you are talking about kernel test robot, it seems to be the timeout. I 
picked FW_CFG_DMA_TIMEOUT 200ms by guess, we may want to extend that for busy 
hosts. How much? several seconds?

> I wonder why bother with DMA reads. Can we limit DMA to writes?

Well, we could, but DMA reads are supposedly quite faster. And if DMA read is 
broken, there are good chances that DMA write will too.

> > ---
> >  drivers/firmware/qemu_fw_cfg.c | 154
> >  -
> >  1 file changed, 137 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/firmware/qemu_fw_cfg.c
> > b/drivers/firmware/qemu_fw_cfg.c
> > index 1f3e8545dab7..2ac4cd869fe6 100644
> > --- a/drivers/firmware/qemu_fw_cfg.c
> > +++ b/drivers/firmware/qemu_fw_cfg.c
> > @@ -33,6 +33,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >  
> >  MODULE_AUTHOR("Gabriel L. Somlo ");
> >  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
> > @@ -43,12 +45,26 @@ MODULE_LICENSE("GPL");
> >  #define FW_CFG_ID 0x01
> >  #define FW_CFG_FILE_DIR   0x19
> >  
> > +#define FW_CFG_VERSION_DMA 0x02
> > +#define FW_CFG_DMA_CTL_ERROR   0x01
> > +#define FW_CFG_DMA_CTL_READ0x02
> > +#define FW_CFG_DMA_CTL_SKIP0x04
> > +#define FW_CFG_DMA_CTL_SELECT  0x08
> > +#define FW_CFG_DMA_CTL_WRITE   0x10
> > +#define FW_CFG_DMA_TIMEOUT 200 /* ms */
> > +
> >  /* size in bytes of fw_cfg signature */
> >  #define FW_CFG_SIG_SIZE 4
> >  
> >  /* fw_cfg "file name" is up to 56 characters (including terminating nul)
> >  */
> >  #define FW_CFG_MAX_FILE_PATH 56
> >  
> > +/* platform device for dma mapping */
> > +static struct device *dev;
> > +
> > +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir.
> > */
> > +static u32 fw_cfg_rev;
> > +
> >  /* fw_cfg file directory entry type */
> >  struct fw_cfg_file {
> > u32 size;
> > @@ -57,6 +73,12 @@ struct fw_cfg_file {
> > char name[FW_CFG_MAX_FILE_PATH];
> >  };
> >  
> > +struct fw_cfg_dma {
> > +   u32 control;
> > +   u32 length;
> > +   u64 address;
> > +} __packed;
> > +
> >  /* fw_cfg device i/o register addresses */
> >  static bool fw_cfg_is_mmio;
> >  static phys_addr_t fw_cfg_p_base;
> > @@ -75,12 +97,93 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
> > return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
> >  }
> >  
> > +static inline bool fw_cfg_dma_enabled(void)
> > +{
> > +   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
> > +}
> > +
> > +static bool fw_cfg_wait_for_control(struct fw_cfg_dma *d, unsigned long
> > timeout)
> > +{
> > +   ktime_t start;
> > +   ktime_t stop;
> > +
> > +   start = ktime_get();
> > +   stop = ktime_add(start, ms_to_ktime(timeout));
> > +
> > +   do {
> > +   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
> > +   return true;
> > +
> > +   usleep_range(50, 100);
> > +   } while (ktime_before(ktime_get(), stop));
> > +
> > +   return false;
> > +}
> > +
> > +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
> > +{
> > +   dma_addr_t dma_addr = 0;
> > +   struct fw_cfg_dma *d;
> > +   dma_addr_t dma;
> > +   ssize_t ret = length;
> > +   enum dma_data_direction dir =
> > +   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
> > +
> > +   if (address && length) {
> > +   dma_addr = dma_map_single(dev, address, length, dir);
> > +   if (dma_mapping_error(NULL, dma_addr)) {
> > +   W

[PATCH v6 5/5] fw_cfg: write vmcoreinfo details

2017-11-13 Thread Marc-André Lureau
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 87 +-
 1 file changed, 86 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 2ac4cd869fe6..7a70e7a549f6 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -35,6 +35,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -59,6 +61,8 @@ MODULE_LICENSE("GPL");
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+#define VMCOREINFO_FORMAT_ELF 0x1
+
 /* platform device for dma mapping */
 static struct device *dev;
 
@@ -127,7 +131,8 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
length, u32 control)
dma_addr_t dma;
ssize_t ret = length;
enum dma_data_direction dir =
-   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
+   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
 
if (address && length) {
dma_addr = dma_map_single(dev, address, length, dir);
@@ -225,6 +230,48 @@ static ssize_t fw_cfg_read_blob(u16 key,
return ret;
 }
 
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+void *buf, loff_t pos, size_t count)
+{
+   u32 glk = -1U;
+   acpi_status status;
+   ssize_t ret = count;
+
+   /* If we have ACPI, ensure mutual exclusion against any potential
+* device access by the firmware, e.g. via AML methods:
+*/
+   status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+   if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+   /* Should never get here */
+   WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+   memset(buf, 0, count);
+   return -EINVAL;
+   }
+
+   mutex_lock(&fw_cfg_dev_lock);
+   if (pos == 0) {
+   ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+   } else {
+   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+   if (ret < 0)
+   goto end;
+   ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+   }
+
+end:
+   mutex_unlock(&fw_cfg_dev_lock);
+
+   acpi_release_global_lock(glk);
+
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
 {
@@ -343,6 +390,37 @@ struct fw_cfg_sysfs_entry {
struct list_head list;
 };
 
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(const struct fw_cfg_file *f)
+{
+   struct vmci {
+   __le16 host_format;
+   __le16 guest_format;
+   __le32 size;
+   __le64 paddr;
+   } __packed;
+   struct vmci *data;
+   ssize_t ret;
+
+   data = kmalloc(sizeof(struct vmci), GFP_KERNEL | GFP_DMA);
+   if (!data)
+   return -ENOMEM;
+
+   /* spare ourself reading host format support for now since we
+* don't know what else to format - host may ignore ours
+*/
+   *data = (struct vmci) {
+   .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+   .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+   .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+   };
+   ret = fw_cfg_write_blob(f->select, data, 0, sizeof(struct vmci));
+
+   kfree(data);
+   return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
 {
@@ -582,6 +660,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file 
*f)
int err;
struct fw_cfg_sysfs_entry *entry;
 
+#ifdef CONFIG_CRASH_CORE
+   if (strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+   if (write_vmcoreinfo(f) < 0)
+   pr_warn("fw_cfg: failed to write vmcoreinfo");
+   }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
-- 
2.15.0.125.g8f49766d64



[PATCH v6 4/5] crash: export paddr_vmcoreinfo_note()

2017-11-13 Thread Marc-André Lureau
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.

CC: Andrew Morton 
CC: Baoquan He 
CC: Dave Young 
CC: Dave Young 
CC: Hari Bathini 
CC: Tony Luck 
CC: Vivek Goyal 
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 6db80fc0810b..47541c891810 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -375,6 +375,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
return __pa(vmcoreinfo_note);
 }
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-- 
2.15.0.125.g8f49766d64



[PATCH v6 3/5] fw_cfg: do DMA read operation

2017-11-13 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

To avoid polling with unbound amount of time, the DMA operation is
expected to complete within 200ms, or will return ETIME error.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 154 -
 1 file changed, 137 insertions(+), 17 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 1f3e8545dab7..2ac4cd869fe6 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +45,26 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+#define FW_CFG_DMA_TIMEOUT 200 /* ms */
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* platform device for dma mapping */
+static struct device *dev;
+
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +73,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +97,93 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+static bool fw_cfg_wait_for_control(struct fw_cfg_dma *d, unsigned long 
timeout)
+{
+   ktime_t start;
+   ktime_t stop;
+
+   start = ktime_get();
+   stop = ktime_add(start, ms_to_ktime(timeout));
+
+   do {
+   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return true;
+
+   usleep_range(50, 100);
+   } while (ktime_before(ktime_get(), stop));
+
+   return false;
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   dma_addr_t dma_addr = 0;
+   struct fw_cfg_dma *d;
+   dma_addr_t dma;
+   ssize_t ret = length;
+   enum dma_data_direction dir =
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+
+   if (address && length) {
+   dma_addr = dma_map_single(dev, address, length, dir);
+   if (dma_mapping_error(NULL, dma_addr)) {
+   WARN(1, "%s: failed to map address\n", __func__);
+   return -EFAULT;
+   }
+   }
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL | GFP_DMA);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   dma = dma_map_single(dev, d, sizeof(*d), DMA_BIDIRECTIONAL);
+   if (dma_mapping_error(NULL, dma)) {
+   WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
+   ret = -EFAULT;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(dma_addr),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   if (!fw_cfg_wait_for_control(d, FW_CFG_DMA_TIMEOUT)) {
+   WARN(1, "%s: timeout", __func__);
+   ret = -ETIME;
+   } else if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+   dma_unmap_single(dev, dma, sizeof(*d), DMA_BIDIRECTIONAL);
+
+end:
+   kfree(d);
+   if (dma_addr)
+   dma_unmap_single(dev, dma_addr, length, dir);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {

[PATCH v6 2/5] fw_cfg: add DMA register

2017-11-13 Thread Marc-André Lureau
Add an optional  kernel module (or command line) parameter
using the following syntax:

  [qemu_fw_cfg.]ioport=@[::[:]]
 or
  [qemu_fw_cfg.]mmio=@[::[:]]

and initializes the register address using given or default offset.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 53 --
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5cfe39f7a45f..1f3e8545dab7 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [qemu_fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::[:]]
  * or
- *  [qemu_fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::[:]]
  *
  * where:
  *   := size of ioport or mmio range
  *   := physical base address of ioport or mmio range
  *   := (optional) offset of control register
  *   := (optional) offset of data register
+ *   := (optional) offset of dma register
  *
  * e.g.:
- *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ *  qemu_fw_cfg.ioport=12@0x510:0:1:4  (the default on x86)
  * or
- *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
+ *  qemu_fw_cfg.mmio=16@0x902:8:0:16   (the default on arm)
  */
 
 #include 
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
 static void __iomem *fw_cfg_dev_base;
 static void __iomem *fw_cfg_reg_ctrl;
 static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
 
 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
 static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
+#  define FW_CFG_DMA_OFF 0x10
 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m 
*/
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x02
 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
 #  define FW_CFG_CTRL_OFF 0x00
 #  define FW_CFG_DATA_OFF 0x01
+#  define FW_CFG_DMA_OFF 0x04
 # else
 #  error "QEMU FW_CFG not available on this architecture!"
 # endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 {
char sig[FW_CFG_SIG_SIZE];
-   struct resource *range, *ctrl, *data;
+   struct resource *range, *ctrl, *data, *dma;
 
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+   dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device 
*pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
 
+   if (dma)
+   fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+   else
+   fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -628,6 +640,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i" \
+":%" __PHYS_ADDR_PREFIX "i%n" \
 ":%" __PHYS_ADDR_PREFIX "i%n"
 
 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -637,12 +650,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
 ":%" __PHYS_ADDR_PREFIX "u" \
 ":%" __PHYS_ADDR_PREFIX "u"
 
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+":%" __PHYS_ADDR_PREFIX "u"
+
 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
 {
-   struct resource res[3] = {};
+   struct resource res[4] = {};
char *str;
p

[PATCH v6 1/5] fw_cfg: fix the command line module name

2017-11-13 Thread Marc-André Lureau
Signed-off-by: Marc-André Lureau 
Acked-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0e2011636fbb..5cfe39f7a45f 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,9 +10,9 @@
  * and select subsets of aarch64), a Device Tree node (on arm), or using
  * a kernel module (or command line) parameter with the following syntax:
  *
- *  [fw_cfg.]ioport=@[::]
+ *  [qemu_fw_cfg.]ioport=@[::]
  * or
- *  [fw_cfg.]mmio=@[::]
+ *  [qemu_fw_cfg.]mmio=@[::]
  *
  * where:
  *   := size of ioport or mmio range
@@ -21,9 +21,9 @@
  *   := (optional) offset of data register
  *
  * e.g.:
- *  fw_cfg.ioport=2@0x510:0:1  (the default on x86)
+ *  qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
  * or
- *  fw_cfg.mmio=0xA@0x902:8:0  (the default on arm)
+ *  qemu_fw_cfg.mmio=0xA@0x902:8:0 (the default on arm)
  */
 
 #include 
-- 
2.15.0.125.g8f49766d64



[PATCH v6 0/5] fw_cfg: add DMA operations & etc/vmcoreinfo support

2017-11-13 Thread Marc-André Lureau
Hi,

This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details.

Note: the support for this entry handling has been merged for upcoming
qemu release (2.11).

v6:
- change acpi_acquire_global_lock() error to return EINVAL
  (instead of EBUSY)
- replace 0 as pointer argument for NULL
- add Gabriel r-b/a-b tags

v5:
- resent to CC kdump people on the paddr_vmcoreinfo_note() export patch

v4:
- export paddr_vmcoreinfo_note() to fix fw_cfg.ko build
- fix build with !CONFIG_CRASH_CORE
- replace the unbounded yield() loop with a usleep_range() loop and a
  200ms timeout
- do not write vmcoreinfo entry when running the kdump kernel (D. Hatayama)
- drop the experimental sysfs write support patch from this series

v3: (thanks kbuild)
- add "fw_cfg: fix the command line module name" patch
- fix build of "fw_cfg: add DMA register" with CONFIG_FW_CFG_SYSFS_CMDLINE=y
- fix 'Wshift-count-overflow'

v2:
- use platform device for dma mapping
- add etc/vmcoreinfo patch
- some code cleanups

Marc-André Lureau (5):
  fw_cfg: fix the command line module name
  fw_cfg: add DMA register
  fw_cfg: do DMA read operation
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

 drivers/firmware/qemu_fw_cfg.c | 292 +
 kernel/crash_core.c|   1 +
 2 files changed, 264 insertions(+), 29 deletions(-)

-- 
2.15.0.125.g8f49766d64



[PATCH v6 3/5] fw_cfg: do DMA read operation

2017-11-13 Thread Marc-André Lureau
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.

To avoid polling with unbound amount of time, the DMA operation is
expected to complete within 200ms, or will return ETIME error.

We may want to switch all the *buf addresses to use only kmalloc'ed
buffers (instead of using stack/image addresses with dma=false).

Signed-off-by: Marc-André Lureau 
Reviewed-by: Gabriel Somlo 
---
 drivers/firmware/qemu_fw_cfg.c | 154 -
 1 file changed, 137 insertions(+), 17 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 1f3e8545dab7..2ac4cd869fe6 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 MODULE_AUTHOR("Gabriel L. Somlo ");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +45,26 @@ MODULE_LICENSE("GPL");
 #define FW_CFG_ID 0x01
 #define FW_CFG_FILE_DIR   0x19
 
+#define FW_CFG_VERSION_DMA 0x02
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ0x02
+#define FW_CFG_DMA_CTL_SKIP0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+#define FW_CFG_DMA_TIMEOUT 200 /* ms */
+
 /* size in bytes of fw_cfg signature */
 #define FW_CFG_SIG_SIZE 4
 
 /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
 #define FW_CFG_MAX_FILE_PATH 56
 
+/* platform device for dma mapping */
+static struct device *dev;
+
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
 /* fw_cfg file directory entry type */
 struct fw_cfg_file {
u32 size;
@@ -57,6 +73,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
 };
 
+struct fw_cfg_dma {
+   u32 control;
+   u32 length;
+   u64 address;
+} __packed;
+
 /* fw_cfg device i/o register addresses */
 static bool fw_cfg_is_mmio;
 static phys_addr_t fw_cfg_p_base;
@@ -75,12 +97,93 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
 }
 
+static inline bool fw_cfg_dma_enabled(void)
+{
+   return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+static bool fw_cfg_wait_for_control(struct fw_cfg_dma *d, unsigned long 
timeout)
+{
+   ktime_t start;
+   ktime_t stop;
+
+   start = ktime_get();
+   stop = ktime_add(start, ms_to_ktime(timeout));
+
+   do {
+   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
+   return true;
+
+   usleep_range(50, 100);
+   } while (ktime_before(ktime_get(), stop));
+
+   return false;
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+   dma_addr_t dma_addr = 0;
+   struct fw_cfg_dma *d;
+   dma_addr_t dma;
+   ssize_t ret = length;
+   enum dma_data_direction dir =
+   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+
+   if (address && length) {
+   dma_addr = dma_map_single(dev, address, length, dir);
+   if (dma_mapping_error(NULL, dma_addr)) {
+   WARN(1, "%s: failed to map address\n", __func__);
+   return -EFAULT;
+   }
+   }
+
+   d = kmalloc(sizeof(*d), GFP_KERNEL | GFP_DMA);
+   if (!d) {
+   ret = -ENOMEM;
+   goto end;
+   }
+
+   dma = dma_map_single(dev, d, sizeof(*d), DMA_BIDIRECTIONAL);
+   if (dma_mapping_error(NULL, dma)) {
+   WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
+   ret = -EFAULT;
+   goto end;
+   }
+
+   *d = (struct fw_cfg_dma) {
+   .address = cpu_to_be64(dma_addr),
+   .length = cpu_to_be32(length),
+   .control = cpu_to_be32(control)
+   };
+
+   iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   iowrite32be(dma, fw_cfg_reg_dma + 4);
+
+   if (!fw_cfg_wait_for_control(d, FW_CFG_DMA_TIMEOUT)) {
+   WARN(1, "%s: timeout", __func__);
+   ret = -ETIME;
+   } else if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR) {
+   ret = -EIO;
+   }
+
+   dma_unmap_single(dev, dma, sizeof(*d), DMA_BIDIRECTIONAL);
+
+end:
+   kfree(d);
+   if (dma_addr)
+   dma_unmap_single(dev, dma_addr, length, dir);
+
+   return ret;
+}
+
 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
-   void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+   void *buf, loff_t pos, size_t count,
+   bool dma)
 {

  1   2   >