Hi John,

I love your patch! Perhaps something to improve:

[auto build test WARNING on iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:    
https://github.com/0day-ci/linux/commits/John-Garry/iommu-arm-smmu-v3-Improve-cmdq-lock-efficiency/20200623-013438
base:   https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
config: arm64-randconfig-c024-20200622 (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <l...@intel.com>

All warnings (new ones prefixed by >>, old ones prefixed by <<):

In file included from include/linux/bits.h:23,
from include/linux/ioport.h:15,
from include/linux/acpi.h:12,
from drivers/iommu/arm-smmu-v3.c:12:
drivers/iommu/arm-smmu-v3.c: In function 'arm_smmu_cmdq_issue_cmdlist':
include/linux/bits.h:26:28: warning: comparison of unsigned expression < 0 is 
always false [-Wtype-limits]
26 |   __builtin_constant_p((l) > (h)), (l) > (h), 0)))
|                            ^
include/linux/build_bug.h:16:62: note: in definition of macro 
'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
|                                                              ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 |  (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
|   ^~~~~~~~~~~~~~~~~~~
>> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK'
1404 |  u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
|                  ^~~~~~~
include/linux/bits.h:26:40: warning: comparison of unsigned expression < 0 is 
always false [-Wtype-limits]
26 |   __builtin_constant_p((l) > (h)), (l) > (h), 0)))
|                                        ^
include/linux/build_bug.h:16:62: note: in definition of macro 
'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
|                                                              ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 |  (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
|   ^~~~~~~~~~~~~~~~~~~
>> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK'
1404 |  u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
|                  ^~~~~~~

vim +/GENMASK +1404 drivers/iommu/arm-smmu-v3.c

  1369  
  1370  /*
  1371   * This is the actual insertion function, and provides the following
  1372   * ordering guarantees to callers:
  1373   *
  1374   * - There is a dma_wmb() before publishing any commands to the queue.
  1375   *   This can be relied upon to order prior writes to data structures
  1376   *   in memory (such as a CD or an STE) before the command.
  1377   *
  1378   * - On completion of a CMD_SYNC, there is a control dependency.
  1379   *   This can be relied upon to order subsequent writes to memory (e.g.
  1380   *   freeing an IOVA) after completion of the CMD_SYNC.
  1381   *
  1382   * - Command insertion is totally ordered, so if two CPUs each race to
  1383   *   insert their own list of commands then all of the commands from one
  1384   *   CPU will appear before any of the commands from the other CPU.
  1385   *
  1386   * - A CMD_SYNC is always inserted, which ensures we limit the prod 
pointer
  1387   *   for when the cmdq is full, such that we don't wrap more than twice.
  1388   *   It also makes it easy for the owner to know by how many to 
increment the
  1389   *   cmdq lock.
  1390   */
  1391  static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
  1392                                         u64 *cmds, int n)
  1393  {
  1394          u64 cmd_sync[CMDQ_ENT_DWORDS];
  1395          const int sync = 1;
  1396          u32 prod;
  1397          unsigned long flags;
  1398          bool owner;
  1399          struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
  1400          struct arm_smmu_ll_queue llq = {
  1401                  .max_n_shift = cmdq->q.llq.max_n_shift,
  1402          }, head = llq, space = llq;
  1403          u32 owner_val = 1 << cmdq->q.llq.owner_count_shift;
> 1404          u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
  1405          u32 owner_mask = GENMASK(30, cmdq->q.llq.owner_count_shift);
  1406          int ret = 0;
  1407  
  1408          /* 1. Allocate some space in the queue */
  1409          local_irq_save(flags);
  1410  
  1411          prod = atomic_fetch_add(n + sync + owner_val,
  1412                                  &cmdq->q.llq.atomic.prod);
  1413  
  1414          owner = !(prod & owner_mask);
  1415          llq.prod = prod_mask & prod;
  1416          head.prod = queue_inc_prod_n(&llq, n + sync);
  1417  
  1418          /*
  1419           * Ensure it's safe to write the entries. For this, we need to 
ensure
  1420           * that there is space in the queue from our prod pointer.
  1421           */
  1422          space.cons = READ_ONCE(cmdq->q.llq.cons);
  1423          space.prod = llq.prod;
  1424          while (!queue_has_space(&space, n + sync)) {
  1425                  if (arm_smmu_cmdq_poll_until_not_full(smmu, &space))
  1426                          dev_err_ratelimited(smmu->dev, "CMDQ 
timeout\n");
  1427  
  1428                  space.prod = llq.prod;
  1429          }
  1430  
  1431          /*
  1432           * 2. Write our commands into the queue
  1433           * Dependency ordering from the space-checking while loop, 
above.
  1434           */
  1435          arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
  1436  
  1437          prod = queue_inc_prod_n(&llq, n);
  1438          arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
  1439          queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
  1440  
  1441          /* 3. Mark our slots as valid, ensuring commands are visible 
first */
  1442          dma_wmb();
  1443          arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
  1444  
  1445          /* 4. If we are the owner, take control of the SMMU hardware */
  1446          if (owner) {
  1447                  int owner_count;
  1448                  u32 prod_tmp;
  1449  
  1450                  /* a. Wait for previous owner to finish */
  1451                  atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == 
llq.prod);
  1452  
  1453                  /* b. Stop gathering work by clearing the owned mask */
  1454                  prod_tmp = atomic_fetch_andnot_relaxed(~prod_mask,
  1455                                                         
&cmdq->q.llq.atomic.prod);
  1456                  prod = prod_tmp & prod_mask;
  1457                  owner_count = prod_tmp & owner_mask;
  1458                  owner_count >>= cmdq->q.llq.owner_count_shift;
  1459  
  1460                  /*
  1461                   * c. Wait for any gathered work to be written to the 
queue.
  1462                   * Note that we read our own entries so that we have 
the control
  1463                   * dependency required by (d).
  1464                   */
  1465                  arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
  1466  
  1467                  /*
  1468                   * In order to determine completion of the CMD_SYNCs, 
we must
  1469                   * ensure that the queue can't wrap twice without us 
noticing.
  1470                   * We achieve that by taking the cmdq lock as shared 
before
  1471                   * progressing the prod pointer.
  1472                   * The owner does this for all the non-owners it has 
gathered.
  1473                   * Otherwise, some non-owner(s) may lock the cmdq, 
blocking
  1474                   * cons being updating. This could be when the cmdq has 
just
  1475                   * become full. In this case, other sibling non-owners 
could be
  1476                   * blocked from progressing, leading to deadlock.
  1477                   */
  1478                  arm_smmu_cmdq_shared_lock(cmdq, owner_count);
  1479  
  1480                  /*
  1481                   * d. Advance the hardware prod pointer
  1482                   * Control dependency ordering from the entries 
becoming valid.
  1483                   */
  1484                  writel_relaxed(prod, cmdq->q.prod_reg);
  1485  
  1486                  /*
  1487                   * e. Tell the next owner we're done
  1488                   * Make sure we've updated the hardware first, so that 
we don't
  1489                   * race to update prod and potentially move it 
backwards.
  1490                   */
  1491                  atomic_set_release(&cmdq->owner_prod, prod);
  1492          }
  1493  
  1494          /* 5. Since we always insert a CMD_SYNC, we must wait for it to 
complete */
  1495          llq.prod = queue_inc_prod_n(&llq, n);
  1496          ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
  1497          if (ret)
  1498                  dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 
0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
  1499                                      llq.prod, 
readl_relaxed(cmdq->q.prod_reg),
  1500                                      readl_relaxed(cmdq->q.cons_reg));
  1501  
  1502          /*
  1503           * Try to unlock the cmdq lock. This will fail if we're the 
last reader,
  1504           * in which case we can safely update cmdq->q.llq.cons
  1505           */
  1506          if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
  1507                  WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
  1508                  arm_smmu_cmdq_shared_unlock(cmdq);
  1509          }
  1510  
  1511          local_irq_restore(flags);
  1512          return ret;
  1513  }
  1514  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org

Attachment: .config.gz
Description: application/gzip

Reply via email to