[PATCH 10/10] staging: ccree: Fix alignment issues in ssi_sysfs.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_sysfs.c | 59 ---
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/drivers/staging/ccree/ssi_sysfs.c 
b/drivers/staging/ccree/ssi_sysfs.c
index dbcd1634aad1..ef23ed43fe9e 100644
--- a/drivers/staging/ccree/ssi_sysfs.c
+++ b/drivers/staging/ccree/ssi_sysfs.c
@@ -144,8 +144,12 @@ static void display_db(struct stat_item 
item[MAX_STAT_OP_TYPES][MAX_STAT_PHASES]
avg = (u64)item[i][j].sum;
do_div(avg, item[i][j].count);
SSI_LOG_ERR("%s, %s: min=%d avg=%d max=%d 
sum=%lld count=%d\n",
-   stat_name_db[i].op_type_name, 
stat_name_db[i].stat_phase_name[j],
-   item[i][j].min, (int)avg, 
item[i][j].max, (long long)item[i][j].sum, item[i][j].count);
+   stat_name_db[i].op_type_name,
+   stat_name_db[i].stat_phase_name[j],
+   item[i][j].min, (int)avg,
+   item[i][j].max,
+   (long long)item[i][j].sum,
+   item[i][j].count);
}
}
}
@@ -156,21 +160,23 @@ static void display_db(struct stat_item 
item[MAX_STAT_OP_TYPES][MAX_STAT_PHASES]
  **/
 
 static ssize_t ssi_sys_stats_host_db_clear(struct kobject *kobj,
-   struct kobj_attribute *attr, const char *buf, size_t count)
+  struct kobj_attribute *attr,
+  const char *buf, size_t count)
 {
init_db(stat_host_db);
return count;
 }
 
 static ssize_t ssi_sys_stats_cc_db_clear(struct kobject *kobj,
-   struct kobj_attribute *attr, const char *buf, size_t count)
+struct kobj_attribute *attr,
+const char *buf, size_t count)
 {
init_db(stat_cc_db);
return count;
 }
 
 static ssize_t ssi_sys_stat_host_db_show(struct kobject *kobj,
-   struct kobj_attribute *attr, char *buf)
+struct kobj_attribute *attr, char *buf)
 {
int i, j;
char line[512];
@@ -179,7 +185,7 @@ static ssize_t ssi_sys_stat_host_db_show(struct kobject 
*kobj,
ssize_t buf_len, tmp_len = 0;
 
buf_len = scnprintf(buf, PAGE_SIZE,
-   "phase\t\t\t\t\t\t\tmin[cy]\tavg[cy]\tmax[cy]\t#samples\n");
+   
"phase\t\t\t\t\t\t\tmin[cy]\tavg[cy]\tmax[cy]\t#samples\n");
if (buf_len < 0)/* scnprintf shouldn't return negative value according 
to its implementation*/
return buf_len;
for (i = STAT_OP_TYPE_ENCODE; i < MAX_STAT_OP_TYPES; i++) {
@@ -193,11 +199,11 @@ static ssize_t ssi_sys_stat_host_db_show(struct kobject 
*kobj,
avg = min_cyc = max_cyc = 0;
}
tmp_len = scnprintf(line, 512,
-   "%s::%s\t\t\t\t\t%6u\t%6u\t%6u\t%7u\n",
-   stat_name_db[i].op_type_name,
-   stat_name_db[i].stat_phase_name[j],
-   min_cyc, (unsigned int)avg, max_cyc,
-   stat_host_db[i][j].count);
+   
"%s::%s\t\t\t\t\t%6u\t%6u\t%6u\t%7u\n",
+   stat_name_db[i].op_type_name,
+   stat_name_db[i].stat_phase_name[j],
+   min_cyc, (unsigned int)avg, max_cyc,
+   stat_host_db[i][j].count);
if (tmp_len < 0)/* scnprintf shouldn't return negative 
value according to its implementation*/
return buf_len;
if (buf_len + tmp_len >= PAGE_SIZE)
@@ -210,7 +216,7 @@ static ssize_t ssi_sys_stat_host_db_show(struct kobject 
*kobj,
 }
 
 static ssize_t ssi_sys_stat_cc_db_show(struct kobject *kobj,
-   struct kobj_attribute *attr, char *buf)
+  struct kobj_attribute *attr, char *buf)
 {
int i;
char line[256];
@@ -219,7 +225,7 @@ static ssize_t ssi_sys_stat_cc_db_show(struct kobject *kobj,
ssize_t buf_len, tmp_len = 0;
 
buf_len = scnprintf(buf, PAGE_SIZE,
-   "phase\tmin[cy]\tavg[cy]\tmax[cy]\t#samples\n");
+   "phase\tmin[cy]\tavg[cy]\tmax[cy]\t#samples\n");
if (buf_len < 0)/* scnprintf shouldn't return negative value according 
to its implementation*/
   

[PATCH 06/10] staging: ccree: Fix alignment issues in ssi_hash.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_hash.c | 116 +--
 1 file changed, 62 insertions(+), 54 deletions(-)

diff --git a/drivers/staging/ccree/ssi_hash.c b/drivers/staging/ccree/ssi_hash.c
index ae8f36af3837..27bd99cd7b88 100644
--- a/drivers/staging/ccree/ssi_hash.c
+++ b/drivers/staging/ccree/ssi_hash.c
@@ -71,8 +71,8 @@ static void ssi_hash_create_xcbc_setup(
unsigned int *seq_size);
 
 static void ssi_hash_create_cmac_setup(struct ahash_request *areq,
- struct cc_hw_desc desc[],
- unsigned int *seq_size);
+  struct cc_hw_desc desc[],
+  unsigned int *seq_size);
 
 struct ssi_hash_alg {
struct list_head entry;
@@ -118,8 +118,8 @@ static void ssi_hash_create_data_desc(
 static inline void ssi_set_hash_endianity(u32 mode, struct cc_hw_desc *desc)
 {
if (unlikely((mode == DRV_HASH_MD5) ||
-   (mode == DRV_HASH_SHA384) ||
-   (mode == DRV_HASH_SHA512))) {
+(mode == DRV_HASH_SHA384) ||
+(mode == DRV_HASH_SHA512))) {
set_bytes_swap(desc, 1);
} else {
set_cipher_config0(desc, HASH_DIGEST_RESULT_LITTLE_ENDIAN);
@@ -136,7 +136,7 @@ static int ssi_hash_map_result(struct device *dev,
   DMA_BIDIRECTIONAL);
if (unlikely(dma_mapping_error(dev, state->digest_result_dma_addr))) {
SSI_LOG_ERR("Mapping digest result buffer %u B for DMA 
failed\n",
-   digestsize);
+   digestsize);
return -ENOMEM;
}
SSI_LOG_DEBUG("Mapped digest result buffer %u B "
@@ -201,12 +201,12 @@ static int ssi_hash_map_request(struct device *dev,
state->digest_buff_dma_addr = dma_map_single(dev, (void 
*)state->digest_buff, ctx->inter_digestsize, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, state->digest_buff_dma_addr)) {
SSI_LOG_ERR("Mapping digest len %d B at va=%pK for DMA 
failed\n",
-   ctx->inter_digestsize, state->digest_buff);
+   ctx->inter_digestsize, state->digest_buff);
goto fail3;
}
SSI_LOG_DEBUG("Mapped digest %d B at va=%pK to dma=0x%llX\n",
-   ctx->inter_digestsize, state->digest_buff,
-   (unsigned long long)state->digest_buff_dma_addr);
+ ctx->inter_digestsize, state->digest_buff,
+ (unsigned long long)state->digest_buff_dma_addr);
 
if (is_hmac) {
dma_sync_single_for_cpu(dev, ctx->digest_buff_dma_addr, 
ctx->inter_digestsize, DMA_BIDIRECTIONAL);
@@ -250,12 +250,12 @@ static int ssi_hash_map_request(struct device *dev,
state->digest_bytes_len_dma_addr = dma_map_single(dev, (void 
*)state->digest_bytes_len, HASH_LEN_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, state->digest_bytes_len_dma_addr)) {
SSI_LOG_ERR("Mapping digest len %u B at va=%pK for DMA 
failed\n",
-   HASH_LEN_SIZE, state->digest_bytes_len);
+   HASH_LEN_SIZE, state->digest_bytes_len);
goto fail4;
}
SSI_LOG_DEBUG("Mapped digest len %u B at va=%pK to 
dma=0x%llX\n",
-   HASH_LEN_SIZE, state->digest_bytes_len,
-   (unsigned long long)state->digest_bytes_len_dma_addr);
+ HASH_LEN_SIZE, state->digest_bytes_len,
+ (unsigned long 
long)state->digest_bytes_len_dma_addr);
} else {
state->digest_bytes_len_dma_addr = 0;
}
@@ -264,12 +264,13 @@ static int ssi_hash_map_request(struct device *dev,
state->opad_digest_dma_addr = dma_map_single(dev, (void 
*)state->opad_digest_buff, ctx->inter_digestsize, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, state->opad_digest_dma_addr)) {
SSI_LOG_ERR("Mapping opad digest %d B at va=%pK for DMA 
failed\n",
-   ctx->inter_digestsize, state->opad_digest_buff);
+   ctx->inter_digestsize,
+   state->opad_digest_buff);
goto fail5;
}
SSI_LOG_DEBUG("Mapped opad digest %d B at va=%pK to 
dma=0x%llX\n",
-   ctx->inter_digestsize, state->opad_digest_buff,
-   (unsigned long long)state->opad_digest_dma_addr);
+ ctx->inter_digestsize, state->opad_digest_buff,
+ (unsigned long long)state->opad_digest_dma_addr);
} else {
state->opad_digest_dma_addr = 0;
 

[PATCH 05/10] staging: ccree: Fix alignment issues in ssi_fips_local.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_fips_local.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/ccree/ssi_fips_local.c 
b/drivers/staging/ccree/ssi_fips_local.c
index aefb71dc9e9a..c16bfab39699 100644
--- a/drivers/staging/ccree/ssi_fips_local.c
+++ b/drivers/staging/ccree/ssi_fips_local.c
@@ -150,8 +150,7 @@ static void fips_dsr(unsigned long devarg)
 
/* after verifing that there is nothing to do, Unmask AXI completion 
interrupt */
CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(HOST_RGF, HOST_IMR),
-   CC_HAL_READ_REGISTER(
-   CC_REG_OFFSET(HOST_RGF, HOST_IMR)) & ~irq);
+ CC_HAL_READ_REGISTER(CC_REG_OFFSET(HOST_RGF, 
HOST_IMR)) & ~irq);
 }
 
 enum cc_fips_error cc_fips_run_power_up_tests(struct ssi_drvdata *drvdata)
-- 
2.11.0



[PATCH 08/10] staging: ccree: Fix alignment issues in ssi_request_mgr.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_request_mgr.c | 42 -
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/ccree/ssi_request_mgr.c 
b/drivers/staging/ccree/ssi_request_mgr.c
index 46d9396f9ff9..efdaeea0f394 100644
--- a/drivers/staging/ccree/ssi_request_mgr.c
+++ b/drivers/staging/ccree/ssi_request_mgr.c
@@ -129,7 +129,7 @@ int request_mgr_init(struct ssi_drvdata *drvdata)
SSI_LOG_DEBUG("hw_queue_size=0x%08X\n", req_mgr_h->hw_queue_size);
if (req_mgr_h->hw_queue_size < MIN_HW_QUEUE_SIZE) {
SSI_LOG_ERR("Invalid HW queue size = %u (Min. required is 
%u)\n",
-   req_mgr_h->hw_queue_size, MIN_HW_QUEUE_SIZE);
+   req_mgr_h->hw_queue_size, MIN_HW_QUEUE_SIZE);
rc = -ENOMEM;
goto req_mgr_init_err;
}
@@ -177,7 +177,8 @@ static inline void enqueue_seq(
writel_relaxed(seq[i].word[5], (volatile void __iomem 
*)(cc_base + CC_REG_OFFSET(CRY_KERNEL, DSCRPTR_QUEUE_WORD0)));
 #ifdef DX_DUMP_DESCS
SSI_LOG_DEBUG("desc[%02d]: 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 
0x%08X\n", i,
-   seq[i].word[0], seq[i].word[1], seq[i].word[2], 
seq[i].word[3], seq[i].word[4], seq[i].word[5]);
+ seq[i].word[0], seq[i].word[1], seq[i].word[2],
+ seq[i].word[3], seq[i].word[4], seq[i].word[5]);
 #endif
}
 }
@@ -211,7 +212,7 @@ static inline int request_mgr_queues_status_check(
  (MAX_REQUEST_QUEUE_SIZE - 1)) ==
 req_mgr_h->req_queue_tail)) {
SSI_LOG_ERR("SW FIFO is full. req_queue_head=%d 
sw_fifo_len=%d\n",
-  req_mgr_h->req_queue_head, MAX_REQUEST_QUEUE_SIZE);
+   req_mgr_h->req_queue_head, MAX_REQUEST_QUEUE_SIZE);
return -EBUSY;
}
 
@@ -221,9 +222,8 @@ static inline int request_mgr_queues_status_check(
/* Wait for space in HW queue. Poll constant num of iterations. */
for (poll_queue = 0; poll_queue < SSI_MAX_POLL_ITER ; poll_queue++) {
req_mgr_h->q_free_slots =
-   CC_HAL_READ_REGISTER(
-   CC_REG_OFFSET(CRY_KERNEL,
-DSCRPTR_QUEUE_CONTENT));
+   CC_HAL_READ_REGISTER(CC_REG_OFFSET(CRY_KERNEL,
+  
DSCRPTR_QUEUE_CONTENT));
if (unlikely(req_mgr_h->q_free_slots <
req_mgr_h->min_free_hw_slots)) {
req_mgr_h->min_free_hw_slots = req_mgr_h->q_free_slots;
@@ -235,7 +235,7 @@ static inline int request_mgr_queues_status_check(
}
 
SSI_LOG_DEBUG("HW FIFO is full. q_free_slots=%d 
total_seq_len=%d\n",
-   req_mgr_h->q_free_slots, total_seq_len);
+ req_mgr_h->q_free_slots, total_seq_len);
}
/* No room in the HW queue try again later */
SSI_LOG_DEBUG("HW FIFO full, timeout. req_queue_head=%d "
@@ -291,9 +291,8 @@ int send_request(
 * in case iv gen add the max size and in case of no dout add 1
 * for the internal completion descriptor
 */
-   rc = request_mgr_queues_status_check(req_mgr_h,
-  cc_base,
-  max_required_seq_len);
+   rc = request_mgr_queues_status_check(req_mgr_h, cc_base,
+max_required_seq_len);
if (likely(rc == 0))
/* There is enough place in the queue */
break;
@@ -326,15 +325,16 @@ int send_request(
 
if (ssi_req->ivgen_dma_addr_len > 0) {
SSI_LOG_DEBUG("Acquire IV from pool into %d DMA addresses 
0x%llX, 0x%llX, 0x%llX, IV-size=%u\n",
-   ssi_req->ivgen_dma_addr_len,
-   (unsigned long long)ssi_req->ivgen_dma_addr[0],
-   (unsigned long long)ssi_req->ivgen_dma_addr[1],
-   (unsigned long long)ssi_req->ivgen_dma_addr[2],
-   ssi_req->ivgen_size);
+ ssi_req->ivgen_dma_addr_len,
+ (unsigned long long)ssi_req->ivgen_dma_addr[0],
+ (unsigned long long)ssi_req->ivgen_dma_addr[1],
+ (unsigned long long)ssi_req->ivgen_dma_addr[2],
+ ssi_req->ivgen_size);
 
/* Acquire IV from pool */
-   rc = ssi_ivgen_getiv(drvdata, ssi_req->ivgen_dma_addr, 
ssi_req->ivgen_dma_addr_len,
-   

[PATCH 09/10] staging: ccree: Fix alignment issues in ssi_sram_mgr.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_sram_mgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/ccree/ssi_sram_mgr.c 
b/drivers/staging/ccree/ssi_sram_mgr.c
index e05c0c13c2eb..f6afe89a 100644
--- a/drivers/staging/ccree/ssi_sram_mgr.c
+++ b/drivers/staging/ccree/ssi_sram_mgr.c
@@ -58,7 +58,7 @@ int ssi_sram_mgr_init(struct ssi_drvdata *drvdata)
sizeof(struct ssi_sram_mgr_ctx), GFP_KERNEL);
if (!drvdata->sram_mgr_handle) {
SSI_LOG_ERR("Not enough memory to allocate SRAM_MGR ctx 
(%zu)\n",
-   sizeof(struct ssi_sram_mgr_ctx));
+   sizeof(struct ssi_sram_mgr_ctx));
rc = -ENOMEM;
goto out;
}
@@ -90,12 +90,12 @@ ssi_sram_addr_t ssi_sram_mgr_alloc(struct ssi_drvdata 
*drvdata, u32 size)
 
if (unlikely((size & 0x3) != 0)) {
SSI_LOG_ERR("Requested buffer size (%u) is not multiple of 4",
-   size);
+   size);
return NULL_SRAM_ADDR;
}
if (unlikely(size > (SSI_CC_SRAM_SIZE - smgr_ctx->sram_free_offset))) {
SSI_LOG_ERR("Not enough space to allocate %u B (at offset 
%llu)\n",
-   size, smgr_ctx->sram_free_offset);
+   size, smgr_ctx->sram_free_offset);
return NULL_SRAM_ADDR;
}
 
-- 
2.11.0



[PATCH 04/10] staging: ccree: Fix alignment issues in ssi_driver.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_driver.c | 40 +++---
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/staging/ccree/ssi_driver.c 
b/drivers/staging/ccree/ssi_driver.c
index 78709b92736d..e26cf174cf2b 100644
--- a/drivers/staging/ccree/ssi_driver.c
+++ b/drivers/staging/ccree/ssi_driver.c
@@ -85,8 +85,7 @@ void dump_byte_array(const char *name, const u8 *the_array, 
unsigned long size)
return;
}
 
-   ret = snprintf(line_buf, sizeof(line_buf), "%s[%lu]: ",
-   name, size);
+   ret = snprintf(line_buf, sizeof(line_buf), "%s[%lu]: ", name, size);
if (ret < 0) {
SSI_LOG_ERR("snprintf returned %d . aborting buffer array 
dump\n", ret);
return;
@@ -95,8 +94,8 @@ void dump_byte_array(const char *name, const u8 *the_array, 
unsigned long size)
for (i = 0, cur_byte = the_array;
 (i < size) && (line_offset < sizeof(line_buf)); i++, cur_byte++) {
ret = snprintf(line_buf + line_offset,
-   sizeof(line_buf) - line_offset,
-   "0x%02X ", *cur_byte);
+  sizeof(line_buf) - line_offset,
+  "0x%02X ", *cur_byte);
if (ret < 0) {
SSI_LOG_ERR("snprintf returned %d . aborting buffer 
array dump\n", ret);
return;
@@ -193,11 +192,11 @@ int init_cc_regs(struct ssi_drvdata *drvdata, bool 
is_probe)
 #ifdef DX_IRQ_DELAY
/* Set CC IRQ delay */
CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(HOST_RGF, HOST_IRQ_TIMER_INIT_VAL),
-   DX_IRQ_DELAY);
+ DX_IRQ_DELAY);
 #endif
if (CC_HAL_READ_REGISTER(CC_REG_OFFSET(HOST_RGF, 
HOST_IRQ_TIMER_INIT_VAL)) > 0) {
SSI_LOG_DEBUG("irq_delay=%d CC cycles\n",
-   CC_HAL_READ_REGISTER(CC_REG_OFFSET(HOST_RGF, 
HOST_IRQ_TIMER_INIT_VAL)));
+ CC_HAL_READ_REGISTER(CC_REG_OFFSET(HOST_RGF, 
HOST_IRQ_TIMER_INIT_VAL)));
}
 #endif
 
@@ -252,9 +251,9 @@ static int init_cc_resources(struct platform_device 
*plat_dev)
goto init_cc_res_err;
}
SSI_LOG_DEBUG("Got MEM resource (%s): start=0x%llX end=0x%llX\n",
-   new_drvdata->res_mem->name,
-   (unsigned long long)new_drvdata->res_mem->start,
-   (unsigned long long)new_drvdata->res_mem->end);
+ new_drvdata->res_mem->name,
+ (unsigned long long)new_drvdata->res_mem->start,
+ (unsigned long long)new_drvdata->res_mem->end);
/* Map registers space */
req_mem_cc_regs = request_mem_region(new_drvdata->res_mem->start, 
resource_size(new_drvdata->res_mem), "arm_cc7x_regs");
if (unlikely(!req_mem_cc_regs)) {
@@ -266,7 +265,8 @@ static int init_cc_resources(struct platform_device 
*plat_dev)
cc_base = ioremap(new_drvdata->res_mem->start, 
resource_size(new_drvdata->res_mem));
if (unlikely(!cc_base)) {
SSI_LOG_ERR("ioremap[CC](0x%08X,0x%08X) failed\n",
-   (unsigned int)new_drvdata->res_mem->start, (unsigned 
int)resource_size(new_drvdata->res_mem));
+   (unsigned int)new_drvdata->res_mem->start,
+   (unsigned int)resource_size(new_drvdata->res_mem));
rc = -ENOMEM;
goto init_cc_res_err;
}
@@ -284,15 +284,15 @@ static int init_cc_resources(struct platform_device 
*plat_dev)
 IRQF_SHARED, "arm_cc7x", new_drvdata);
if (unlikely(rc != 0)) {
SSI_LOG_ERR("Could not register to interrupt %llu\n",
-   (unsigned long long)new_drvdata->res_irq->start);
+   (unsigned long long)new_drvdata->res_irq->start);
goto init_cc_res_err;
}
init_completion(_drvdata->icache_setup_completion);
 
irq_registered = true;
SSI_LOG_DEBUG("Registered to IRQ (%s) %llu\n",
-   new_drvdata->res_irq->name,
-   (unsigned long long)new_drvdata->res_irq->start);
+ new_drvdata->res_irq->name,
+ (unsigned long long)new_drvdata->res_irq->start);
 
new_drvdata->plat_dev = plat_dev;
 
@@ -313,7 +313,7 @@ static int init_cc_resources(struct platform_device 
*plat_dev)
signature_val = CC_HAL_READ_REGISTER(CC_REG_OFFSET(HOST_RGF, 
HOST_SIGNATURE));
if (signature_val != DX_DEV_SIGNATURE) {
SSI_LOG_ERR("Invalid CC signature: SIGNATURE=0x%08X != 
expected=0x%08X\n",
-   signature_val, (u32)DX_DEV_SIGNATURE);
+   signature_val, (u32)DX_DEV_SIGNATURE);

[PATCH 02/10] staging: ccree: Fix alignment issues in ssi_buffer_mgr.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_buffer_mgr.c | 295 ++---
 1 file changed, 164 insertions(+), 131 deletions(-)

diff --git a/drivers/staging/ccree/ssi_buffer_mgr.c 
b/drivers/staging/ccree/ssi_buffer_mgr.c
index b35871eeabd1..3f163d3f3f48 100644
--- a/drivers/staging/ccree/ssi_buffer_mgr.c
+++ b/drivers/staging/ccree/ssi_buffer_mgr.c
@@ -162,8 +162,8 @@ static inline int ssi_buffer_mgr_render_buff_to_mlli(
cc_lli_set_addr(mlli_entry_p, buff_dma);
cc_lli_set_size(mlli_entry_p, CC_MAX_MLLI_ENTRY_SIZE);
SSI_LOG_DEBUG("entry[%d]: single_buff=0x%08X size=%08X\n", 
*curr_nents,
-  mlli_entry_p[LLI_WORD0_OFFSET],
-  mlli_entry_p[LLI_WORD1_OFFSET]);
+ mlli_entry_p[LLI_WORD0_OFFSET],
+ mlli_entry_p[LLI_WORD1_OFFSET]);
buff_dma += CC_MAX_MLLI_ENTRY_SIZE;
buff_size -= CC_MAX_MLLI_ENTRY_SIZE;
mlli_entry_p = mlli_entry_p + 2;
@@ -173,8 +173,8 @@ static inline int ssi_buffer_mgr_render_buff_to_mlli(
cc_lli_set_addr(mlli_entry_p, buff_dma);
cc_lli_set_size(mlli_entry_p, buff_size);
SSI_LOG_DEBUG("entry[%d]: single_buff=0x%08X size=%08X\n", *curr_nents,
-  mlli_entry_p[LLI_WORD0_OFFSET],
-  mlli_entry_p[LLI_WORD1_OFFSET]);
+ mlli_entry_p[LLI_WORD0_OFFSET],
+ mlli_entry_p[LLI_WORD1_OFFSET]);
mlli_entry_p = mlli_entry_p + 2;
*mlli_entry_pp = mlli_entry_p;
(*curr_nents)++;
@@ -302,7 +302,7 @@ static inline void ssi_buffer_mgr_add_scatterlist_entry(
unsigned int index = sgl_data->num_of_buffers;
 
SSI_LOG_DEBUG("index=%u nents=%u sgl=%pK data_len=0x%08X is_last=%d\n",
-index, nents, sgl, data_len, is_last_table);
+ index, nents, sgl, data_len, is_last_table);
sgl_data->nents[index] = nents;
sgl_data->entry[index].sgl = sgl;
sgl_data->offset[index] = data_offset;
@@ -317,7 +317,7 @@ static inline void ssi_buffer_mgr_add_scatterlist_entry(
 
 static int
 ssi_buffer_mgr_dma_map_sg(struct device *dev, struct scatterlist *sg, u32 
nents,
-enum dma_data_direction direction)
+ enum dma_data_direction direction)
 {
u32 i, j;
struct scatterlist *l_sg = sg;
@@ -374,7 +374,7 @@ static int ssi_buffer_mgr_map_scatterlist(
if (*nents > max_sg_nents) {
*nents = 0;
SSI_LOG_ERR("Too many fragments. current %d max %d\n",
-  *nents, max_sg_nents);
+   *nents, max_sg_nents);
return -ENOMEM;
}
if (!is_chained) {
@@ -408,10 +408,10 @@ static int ssi_buffer_mgr_map_scatterlist(
 
 static inline int
 ssi_aead_handle_config_buf(struct device *dev,
-   struct aead_req_ctx *areq_ctx,
-   u8 *config_data,
-   struct buffer_array *sg_data,
-   unsigned int assoclen)
+  struct aead_req_ctx *areq_ctx,
+  u8 *config_data,
+  struct buffer_array *sg_data,
+  unsigned int assoclen)
 {
SSI_LOG_DEBUG(" handle additional data config set to   DLLI\n");
/* create sg for the current buffer */
@@ -433,19 +433,18 @@ ssi_aead_handle_config_buf(struct device *dev,
/* prepare for case of MLLI */
if (assoclen > 0) {
ssi_buffer_mgr_add_scatterlist_entry(sg_data, 1,
-   _ctx->ccm_adata_sg,
-   (AES_BLOCK_SIZE +
-   areq_ctx->ccm_hdr_size), 0,
-   false, NULL);
+_ctx->ccm_adata_sg,
+(AES_BLOCK_SIZE + 
areq_ctx->ccm_hdr_size),
+0, false, NULL);
}
return 0;
 }
 
 static inline int ssi_ahash_handle_curr_buf(struct device *dev,
-  struct ahash_req_ctx *areq_ctx,
-  u8 *curr_buff,
-  u32 curr_buff_cnt,
-  struct buffer_array *sg_data)
+   struct ahash_req_ctx *areq_ctx,
+   u8 *curr_buff,
+   u32 curr_buff_cnt,
+   struct buffer_array *sg_data)
 {
SSI_LOG_DEBUG(" handle curr buff %x set to   

[PATCH 00/10] Fix alignment issues in staging/ccree

2017-07-01 Thread Simon Sandström
Fixes a total of 195 alignment issues in staging/ccree reported by
checkpatch.pl. Adds a few "line over 80 characters" warnings as a
result of the realignments, but I could try to get rid of them in the
same patchset if needed.

-- Simon

---

Simon Sandström (10):
  staging: ccree: Fix alignment issues in ssi_aead.c
  staging: ccree: Fix alignment issues in ssi_buffer_mgr.c
  staging: ccree: Fix alignment issues in ssi_cipher.c
  staging: ccree: Fix alignment issues in ssi_driver.c
  staging: ccree: Fix alignment issues in ssi_fips_local.c
  staging: ccree: Fix alignment issues in ssi_hash.c
  staging: ccree: Fix alignment issues in ssi_ivgen.c
  staging: ccree: Fix alignment issues in ssi_request_mgr.c
  staging: ccree: Fix alignment issues in ssi_sram_mgr.c
  staging: ccree: Fix alignment issues in ssi_sysfs.c

 drivers/staging/ccree/ssi_aead.c|  67 
 drivers/staging/ccree/ssi_buffer_mgr.c  | 295 ++--
 drivers/staging/ccree/ssi_cipher.c  |  75 
 drivers/staging/ccree/ssi_driver.c  |  40 ++---
 drivers/staging/ccree/ssi_fips_local.c  |   3 +-
 drivers/staging/ccree/ssi_hash.c| 116 +++--
 drivers/staging/ccree/ssi_ivgen.c   |   3 +-
 drivers/staging/ccree/ssi_request_mgr.c |  42 +++--
 drivers/staging/ccree/ssi_sram_mgr.c|   6 +-
 drivers/staging/ccree/ssi_sysfs.c   |  59 ---
 10 files changed, 372 insertions(+), 334 deletions(-)

-- 
2.11.0



[PATCH 03/10] staging: ccree: Fix alignment issues in ssi_cipher.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_cipher.c | 75 +-
 1 file changed, 34 insertions(+), 41 deletions(-)

diff --git a/drivers/staging/ccree/ssi_cipher.c 
b/drivers/staging/ccree/ssi_cipher.c
index cd2eafc04232..f217e1ab1b08 100644
--- a/drivers/staging/ccree/ssi_cipher.c
+++ b/drivers/staging/ccree/ssi_cipher.c
@@ -92,8 +92,7 @@ static int validate_keys_sizes(struct ssi_ablkcipher_ctx 
*ctx_p, u32 size) {
break;
}
case S_DIN_to_DES:
-   if (likely(size == DES3_EDE_KEY_SIZE ||
-   size == DES_KEY_SIZE))
+   if (likely(size == DES3_EDE_KEY_SIZE || size == DES_KEY_SIZE))
return 0;
break;
 #if SSI_CC_HAS_MULTI2
@@ -183,8 +182,8 @@ static int ssi_blkcipher_init(struct crypto_tfm *tfm)
int rc = 0;
unsigned int max_key_buf_size = get_max_keysize(tfm);
 
-   SSI_LOG_DEBUG("Initializing context @%p for %s\n", ctx_p,
-   crypto_tfm_alg_name(tfm));
+   SSI_LOG_DEBUG("Initializing context @%p for %s\n",
+ ctx_p, crypto_tfm_alg_name(tfm));
 
CHECK_AND_RETURN_UPON_FIPS_ERROR();
ctx_p->cipher_mode = ssi_alg->cipher_mode;
@@ -206,12 +205,12 @@ static int ssi_blkcipher_init(struct crypto_tfm *tfm)
 max_key_buf_size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) {
SSI_LOG_ERR("Mapping Key %u B at va=%pK for DMA failed\n",
-   max_key_buf_size, ctx_p->user.key);
+   max_key_buf_size, ctx_p->user.key);
return -ENOMEM;
}
SSI_LOG_DEBUG("Mapped key %u B at va=%pK to dma=0x%llX\n",
-   max_key_buf_size, ctx_p->user.key,
-   (unsigned long long)ctx_p->user.key_dma_addr);
+ max_key_buf_size, ctx_p->user.key,
+ (unsigned long long)ctx_p->user.key_dma_addr);
 
if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
/* Alloc hash tfm for essiv */
@@ -232,7 +231,7 @@ static void ssi_blkcipher_exit(struct crypto_tfm *tfm)
unsigned int max_key_buf_size = get_max_keysize(tfm);
 
SSI_LOG_DEBUG("Clearing context @%p for %s\n",
-   crypto_tfm_ctx(tfm), crypto_tfm_alg_name(tfm));
+ crypto_tfm_ctx(tfm), crypto_tfm_alg_name(tfm));
 
if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
/* Free hash tfm for essiv */
@@ -242,9 +241,9 @@ static void ssi_blkcipher_exit(struct crypto_tfm *tfm)
 
/* Unmap key buffer */
dma_unmap_single(dev, ctx_p->user.key_dma_addr, max_key_buf_size,
-   DMA_TO_DEVICE);
+DMA_TO_DEVICE);
SSI_LOG_DEBUG("Unmapped key buffer key_dma_addr=0x%llX\n",
-   (unsigned long long)ctx_p->user.key_dma_addr);
+ (unsigned long long)ctx_p->user.key_dma_addr);
 
/* Free key buffer in context */
kfree(ctx_p->user.key);
@@ -270,7 +269,7 @@ static int ssi_fips_verify_3des_keys(const u8 *key, 
unsigned int keylen)
 
/* verify key1 != key2 and key3 != key2*/
if (unlikely((memcmp((u8 *)tdes_key->key1, (u8 *)tdes_key->key2, 
sizeof(tdes_key->key1)) == 0) ||
- (memcmp((u8 *)tdes_key->key3, (u8 *)tdes_key->key2, 
sizeof(tdes_key->key3)) == 0))) {
+(memcmp((u8 *)tdes_key->key3, (u8 *)tdes_key->key2, 
sizeof(tdes_key->key3)) == 0))) {
return -ENOEXEC;
}
 #endif /* CCREE_FIPS_SUPPORT */
@@ -317,7 +316,7 @@ static int ssi_blkcipher_setkey(struct crypto_tfm *tfm,
unsigned int max_key_buf_size = get_max_keysize(tfm);
 
SSI_LOG_DEBUG("Setting key in context @%p for %s. keylen=%u\n",
-   ctx_p, crypto_tfm_alg_name(tfm), keylen);
+ ctx_p, crypto_tfm_alg_name(tfm), keylen);
dump_byte_array("key", (u8 *)key, keylen);
 
CHECK_AND_RETURN_UPON_FIPS_ERROR();
@@ -396,7 +395,7 @@ static int ssi_blkcipher_setkey(struct crypto_tfm *tfm,
 
/* STAT_PHASE_1: Copy key to ctx */
dma_sync_single_for_cpu(dev, ctx_p->user.key_dma_addr,
-   max_key_buf_size, DMA_TO_DEVICE);
+   max_key_buf_size, DMA_TO_DEVICE);
 
if (ctx_p->flow_mode == S_DIN_to_MULTI2) {
 #if SSI_CC_HAS_MULTI2
@@ -429,7 +428,7 @@ static int ssi_blkcipher_setkey(struct crypto_tfm *tfm,
}
}
dma_sync_single_for_device(dev, ctx_p->user.key_dma_addr,
-   max_key_buf_size, DMA_TO_DEVICE);
+  max_key_buf_size, DMA_TO_DEVICE);
ctx_p->keylen = keylen;
 
 

[PATCH 07/10] staging: ccree: Fix alignment issues in ssi_ivgen.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_ivgen.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/ccree/ssi_ivgen.c 
b/drivers/staging/ccree/ssi_ivgen.c
index 5ff3368c04d9..0260fbd08be8 100644
--- a/drivers/staging/ccree/ssi_ivgen.c
+++ b/drivers/staging/ccree/ssi_ivgen.c
@@ -166,7 +166,8 @@ void ssi_ivgen_fini(struct ssi_drvdata *drvdata)
if (ivgen_ctx->pool_meta) {
memset(ivgen_ctx->pool_meta, 0, SSI_IVPOOL_META_SIZE);
dma_free_coherent(device, SSI_IVPOOL_META_SIZE,
-   ivgen_ctx->pool_meta, ivgen_ctx->pool_meta_dma);
+ ivgen_ctx->pool_meta,
+ ivgen_ctx->pool_meta_dma);
}
 
ivgen_ctx->pool = NULL_SRAM_ADDR;
-- 
2.11.0



[PATCH 01/10] staging: ccree: Fix alignment issues in ssi_aead.c

2017-07-01 Thread Simon Sandström
Fixes checkpatch.pl alignment warnings.

Signed-off-by: Simon Sandström 
---
 drivers/staging/ccree/ssi_aead.c | 67 +---
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/drivers/staging/ccree/ssi_aead.c b/drivers/staging/ccree/ssi_aead.c
index 1fc0b05ea0d5..146173b8cbb9 100644
--- a/drivers/staging/ccree/ssi_aead.c
+++ b/drivers/staging/ccree/ssi_aead.c
@@ -93,14 +93,14 @@ static void ssi_aead_exit(struct crypto_aead *tfm)
struct ssi_aead_ctx *ctx = crypto_aead_ctx(tfm);
 
SSI_LOG_DEBUG("Clearing context @%p for %s\n",
-   crypto_aead_ctx(tfm), crypto_tfm_alg_name(&(tfm->base)));
+ crypto_aead_ctx(tfm), crypto_tfm_alg_name(&(tfm->base)));
 
dev = >drvdata->plat_dev->dev;
/* Unmap enckey buffer */
if (ctx->enckey) {
dma_free_coherent(dev, AES_MAX_KEY_SIZE, ctx->enckey, 
ctx->enckey_dma_addr);
SSI_LOG_DEBUG("Freed enckey DMA buffer 
enckey_dma_addr=0x%llX\n",
-   (unsigned long long)ctx->enckey_dma_addr);
+ (unsigned long long)ctx->enckey_dma_addr);
ctx->enckey_dma_addr = 0;
ctx->enckey = NULL;
}
@@ -108,29 +108,29 @@ static void ssi_aead_exit(struct crypto_aead *tfm)
if (ctx->auth_mode == DRV_HASH_XCBC_MAC) { /* XCBC authetication */
if (ctx->auth_state.xcbc.xcbc_keys) {
dma_free_coherent(dev, CC_AES_128_BIT_KEY_SIZE * 3,
-   ctx->auth_state.xcbc.xcbc_keys,
-   ctx->auth_state.xcbc.xcbc_keys_dma_addr);
+ ctx->auth_state.xcbc.xcbc_keys,
+ 
ctx->auth_state.xcbc.xcbc_keys_dma_addr);
}
SSI_LOG_DEBUG("Freed xcbc_keys DMA buffer 
xcbc_keys_dma_addr=0x%llX\n",
-   (unsigned long 
long)ctx->auth_state.xcbc.xcbc_keys_dma_addr);
+ (unsigned long 
long)ctx->auth_state.xcbc.xcbc_keys_dma_addr);
ctx->auth_state.xcbc.xcbc_keys_dma_addr = 0;
ctx->auth_state.xcbc.xcbc_keys = NULL;
} else if (ctx->auth_mode != DRV_HASH_NULL) { /* HMAC auth. */
if (ctx->auth_state.hmac.ipad_opad) {
dma_free_coherent(dev, 2 * MAX_HMAC_DIGEST_SIZE,
-   ctx->auth_state.hmac.ipad_opad,
-   ctx->auth_state.hmac.ipad_opad_dma_addr);
+ ctx->auth_state.hmac.ipad_opad,
+ 
ctx->auth_state.hmac.ipad_opad_dma_addr);
SSI_LOG_DEBUG("Freed ipad_opad DMA buffer 
ipad_opad_dma_addr=0x%llX\n",
-   (unsigned long 
long)ctx->auth_state.hmac.ipad_opad_dma_addr);
+ (unsigned long 
long)ctx->auth_state.hmac.ipad_opad_dma_addr);
ctx->auth_state.hmac.ipad_opad_dma_addr = 0;
ctx->auth_state.hmac.ipad_opad = NULL;
}
if (ctx->auth_state.hmac.padded_authkey) {
dma_free_coherent(dev, MAX_HMAC_BLOCK_SIZE,
-   ctx->auth_state.hmac.padded_authkey,
-   ctx->auth_state.hmac.padded_authkey_dma_addr);
+ ctx->auth_state.hmac.padded_authkey,
+ 
ctx->auth_state.hmac.padded_authkey_dma_addr);
SSI_LOG_DEBUG("Freed padded_authkey DMA buffer 
padded_authkey_dma_addr=0x%llX\n",
-   (unsigned long 
long)ctx->auth_state.hmac.padded_authkey_dma_addr);
+ (unsigned long 
long)ctx->auth_state.hmac.padded_authkey_dma_addr);
ctx->auth_state.hmac.padded_authkey_dma_addr = 0;
ctx->auth_state.hmac.padded_authkey = NULL;
}
@@ -187,7 +187,7 @@ static int ssi_aead_init(struct crypto_aead *tfm)
goto init_failed;
}
SSI_LOG_DEBUG("Allocated authkey buffer in context 
ctx->authkey=@%p\n",
-   ctx->auth_state.hmac.ipad_opad);
+ ctx->auth_state.hmac.ipad_opad);
 
ctx->auth_state.hmac.padded_authkey = dma_alloc_coherent(dev,
MAX_HMAC_BLOCK_SIZE,
@@ -223,7 +223,7 @@ static void ssi_aead_complete(struct device *dev, void 
*ssi_req, void __iomem *c
 
if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
if (memcmp(areq_ctx->mac_buf, areq_ctx->icv_virt_addr,
-   ctx->authsize) != 0) {
+  ctx->authsize) != 0) {
SSI_LOG_DEBUG("Payload authentication failure, "
   

[RFT PATCH] crypto: arm/ghash - add NEON accelerated fallback for vmull.p64

2017-07-01 Thread Ard Biesheuvel
Implement a NEON fallback for systems that do support NEON but have
no support for the optional 64x64->128 polynomial multiplication
instruction that is part of the ARMv8 Crypto Extensions. It is based
on the paper "Fast Software Polynomial Multiplication on ARM Processors
Using the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
Ricardo Dahab (https://hal.inria.fr/hal-01506572)

On a 32-bit guest executing under KVM on a Cortex-A57, the new code is
not only >3x faster than the generic table based GHASH driver, it is also
time invariant. (Note that the existing vmull.p64 code is 16x faster on
this core).

Signed-off-by: Ard Biesheuvel 
---

Raw numbers for a 2 GHz AMD Seattle (A57 r1p2) after the patch.

 arch/arm/crypto/Kconfig |   5 +-
 arch/arm/crypto/ghash-ce-core.S | 110 +---
 arch/arm/crypto/ghash-ce-glue.c |  24 -
 3 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index d8f3336bfc88..0b960ed124ae 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -106,14 +106,15 @@ config CRYPTO_AES_ARM_CE
  ARMv8 Crypto Extensions
 
 config CRYPTO_GHASH_ARM_CE
-   tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+   tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
help
  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
- that is part of the ARMv8 Crypto Extensions
+ that is part of the ARMv8 Crypto Extensions, or a slower variant that
+ uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
 config CRYPTO_CRCT10DIF_ARM_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index f6ab8bcc9efe..a017a9213f7e 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
 /*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  *
- * Copyright (C) 2015 Linaro Ltd. 
+ * Copyright (C) 2015 - 2017 Linaro Ltd. 
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -20,26 +20,90 @@
XM  .reqq6
XH  .reqq7
IN1 .reqq7
+   T3  .reqq8
 
SHASH_L .reqd0
SHASH_H .reqd1
SHASH2_L.reqd2
T1_L.reqd4
+   T2_L.reqd6
MASK_L  .reqd8
XL_L.reqd10
XL_H.reqd11
XM_L.reqd12
XM_H.reqd13
XH_L.reqd14
+   T3_L.reqd16
+
+   k16 .reqd21
+   k32 .reqd22
+   k48 .reqd23
+
+   t0l .reqd24
+   t0h .reqd25
+   t1l .reqd26
+   t1h .reqd27
+   t2l .reqd28
+   t2h .reqd29
+   t3l .reqd30
+   t3h .reqd31
+
+   t0q .reqq12
+   t1q .reqq13
+   t2q .reqq14
+   t3q .reqq15
 
.text
.fpucrypto-neon-fp-armv8
 
/*
-* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-* struct ghash_key const *k, const char *head)
+* This implementation of 64x64 -> 128 bit polynomial multiplication
+* using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+* "Fast Software Polynomial Multiplication on ARM Processors Using
+* the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+* Ricardo Dahab (https://hal.inria.fr/hal-01506572)
 */
-ENTRY(pmull_ghash_update)
+   .macro  vmull_p64, rq, ad, bd
+   vext.8  t0l, \ad, \ad, #1   @ A1
+   vmull.p8t0q, t0l, \bd   @ F = A1*B
+   vext.8  \rq\()_L, \bd, \bd, #1  @ B1
+   vmull.p8\rq, \ad, \rq\()_L  @ E = A*B1
+   vext.8  t1l, \ad, \ad, #2   @ A2
+   vmull.p8t1q, t1l, \bd   @ H = A2*B
+   vext.8  t3l, \bd, \bd, #2   @ B2
+   vmull.p8t3q, \ad, t3l   @ G = A*B2
+   vext.8  t2l, \ad, \ad, #3   @ A3
+   vmull.p8t2q, t2l, \bd   @ J = A3*B
+