Re: [RFC PATCH 5/5] scsi: ufs: Prepare HPB read for cached sub-region

2020-06-11 Thread Daejun Park
On 2020-06-04 18:38, Daejun Park wrote:
> > +  if (total_srgn_cnt != 0) {
> > +dev_err(hba->dev, "ufshpb(%d) error total_subregion_count %d",
> > +  hpb->lun, total_srgn_cnt);
> > +goto release_srgn_table;
> > +  }
> > +
> > +  return 0;
> > +release_srgn_table:
> > +  for (i = 0; i < rgn_idx; i++) {
> > +rgn = rgn_table + i;
> > +if (rgn->srgn_tbl)
> > +  kvfree(rgn->srgn_tbl);
> > +  }

> Please insert a blank line above goto labels as is done in most of the
> kernel code.
OK, I will fix it.

> > +static struct device_attribute ufshpb_sysfs_entries[] = {
> > +  __ATTR(hit_count, 0444, ufshpb_sysfs_show_hit_cnt, NULL),
> > +  __ATTR(miss_count, 0444, ufshpb_sysfs_show_miss_cnt, NULL),
> > +  __ATTR(rb_noti_count, 0444, ufshpb_sysfs_show_rb_noti_cnt, NULL),
> > +  __ATTR(rb_active_count, 0444, ufshpb_sysfs_show_rb_active_cnt, NULL),
> > +  __ATTR(rb_inactive_count, 0444, ufshpb_sysfs_show_rb_inactive_cnt,
> > + NULL),
> > +  __ATTR(map_req_count, 0444, ufshpb_sysfs_show_map_req_cnt, NULL),
> > +  __ATTR_NULL
> > +};

> Please use __ATTR_RO() where appropriate.
They are only readable attributes. So I changed the code to use __ATTR_RO.

> > +static int ufshpb_create_sysfs(struct ufs_hba *hba, struct ufshpb_lu *hpb)
> > +{
> > +  struct device_attribute *attr;
> > +  int ret;
> > +
> > +  device_initialize(>hpb_lu_dev);
> > +
> > +  ufshpb_stat_init(hpb);
> > +
> > +  hpb->hpb_lu_dev.parent = get_device(>ufsf.hpb_dev);
> > +  hpb->hpb_lu_dev.release = ufshpb_dev_release;
> > +  dev_set_name(>hpb_lu_dev, "ufshpb_lu%d", hpb->lun);
> > +
> > +  ret = device_add(>hpb_lu_dev);
> > +  if (ret) {
> > +dev_err(hba->dev, "ufshpb(%d) device_add failed",
> > +  hpb->lun);
> > +return -ENODEV;
> > +  }
> > +
> > +  for (attr = ufshpb_sysfs_entries; attr->attr.name != NULL; attr++) {
> > +if (device_create_file(>hpb_lu_dev, attr))
> > +  dev_err(hba->dev, "ufshpb(%d) %s create file error\n",
> > +hpb->lun, attr->attr.name);
> > +  }
> > +
> > +  return 0;
> > +}

> This is the wrong way to create sysfs attributes. Please set the
> 'groups' member of struct device instead of using a loop to create sysfs
> attributes. The former approach is compatible with udev but the latter
> approach is not.
OK, I changed to create attributes without loop.

> > +static void ufshpb_probe_async(void *data, async_cookie_t cookie)
> > +{
> > +  struct ufshpb_dev_info hpb_dev_info = { 0 };
> > +  struct ufs_hba *hba = data;
> > +  char *desc_buf;
> > +  int ret;
> > +
> > +  desc_buf = kzalloc(QUERY_DESC_MAX_SIZE, GFP_KERNEL);
> > +  if (!desc_buf)
> > +goto release_desc_buf;
> > +
> > +  ret = ufshpb_get_dev_info(hba, _dev_info, desc_buf);
> > +  if (ret)
> > +goto release_desc_buf;
> > +
> > +  /*
> > +   * Because HPB driver uses scsi_device data structure,
> > +   * we should wait at this point until finishing initialization of all
> > +   * scsi devices. Even if timeout occurs, HPB driver will search
> > +   * the scsi_device list on struct scsi_host (shost->__host list_head)
> > +   * and can find out HPB logical units in all scsi_devices
> > +   */
> > +  wait_event_timeout(hba->ufsf.sdev_wait,
> > + (atomic_read(>ufsf.slave_conf_cnt)
> > +== hpb_dev_info.num_lu),
> > + SDEV_WAIT_TIMEOUT);
> > +
> > +  dev_dbg(hba->dev, "ufshpb: slave count %d, lu count %d\n",
> > +atomic_read(>ufsf.slave_conf_cnt), hpb_dev_info.num_lu);
> > +
> > +  ufshpb_scan_hpb_lu(hba, _dev_info, desc_buf);
> > +release_desc_buf:
> > +  kfree(desc_buf);
> > +}

> What happens if two LUNs are added before the above code is woken up?
> Will that perhaps cause the wait_event_timeout() call to wait forever?
I don't think it is problem. I think that the wait_event_timeout() will
check the condition before waiting.

> > +static int ufshpb_probe(struct device *dev)
> > +{
> > +  struct ufs_hba *hba;
> > +  struct ufsf_feature_info *ufsf;
> > +
> > +  if (dev->type != _dev_type)
> > +return -ENODEV;
> > +
> > +  ufsf = container_of(dev, struct ufsf_feature_info, hpb_dev);
> > +  hba = container_of(ufsf, struct ufs_hba, ufsf);
> > +
> > +  async_schedule(ufshpb_probe_async, hba);
> > +  return 0;
> > +}

> So this is an asynchronous probe that is not visible to the device
> driver core? Could the PROBE_PREFER_ASYNCHRONOUS flag have been used
> instead to make device probing asynchronous?
I added the PROBE_PREFER_ASYNCHRONOUS flag to code and changed it to probe
synchronously.
 
> > +static int ufshpb_remove(struct device *dev)
> > +{
> > +  struct ufshpb_lu *hpb, *n_hpb;
> > +  struct ufsf_feature_info *ufsf;
> > +  struct scsi_device *sdev;
> > +
> > +  ufsf = container_of(dev, struct ufsf_feature_info, hpb_dev);
> > +
> > +  dev_set_drvdata(>hpb_dev, NULL);
> > +
> > +  list_for_each_entry_safe(hpb, n_hpb, _drv.lh_hpb_lu,
> > + list_hpb_lu) {
> > +ufshpb_set_state(hpb, HPB_FAILED);
> > +
> > +sdev = hpb->sdev_ufs_lu;
> > +sdev->hostdata = NULL;
> > +
> 

RE: [RFC PATCH 5/5] scsi: ufs: Prepare HPB read for cached sub-region

2020-06-11 Thread Avri Altman


> > +static inline bool ufshpb_is_support_chunk(int transfer_len)
> > +{
> > + return transfer_len <= HPB_MULTI_CHUNK_HIGH;
> > +}
> 
> The names used in the above function are mysterious. What is a support
> chunk? What does "multi chunk high" mean? Please add a comment.
HPB1.0 limits transfer_len to be at most 1.
HPB2.0, which is in its final draft, allows transfer_len to be at most 128,
But introduce some new behavior depends on transfer_len.
This is just preparing for that.

Thanks,
Avri


Re: [RFC PATCH 5/5] scsi: ufs: Prepare HPB read for cached sub-region

2020-06-10 Thread Bart Van Assche
On 2020-06-04 19:12, Daejun Park wrote:
> +static inline bool ufshpb_is_read_cmd(struct scsi_cmnd *cmd)
> +{
> + if (cmd->cmnd[0] == READ_10 || cmd->cmnd[0] == READ_16)
> + return true;
> +
> + return false;
> +}

Has it been considered to check req_op(cmd->request) instead of checking
the SCSI CDB?

> +static inline bool ufshpb_is_write_discard_cmd(struct scsi_cmnd *cmd)
> +{
> + if (cmd->cmnd[0] == WRITE_10 || cmd->cmnd[0] == WRITE_16 ||
> + cmd->cmnd[0] == UNMAP)
> + return true;
> +
> + return false;
> +}

Does the above code depend on how the sd driver translates write and
discard requests? Do UFS devices support the WRITE SAME SCSI command?
Has it been considered to check req_op(cmd->request) instead of checking
the SCSI CDB?

> +static inline bool ufshpb_is_support_chunk(int transfer_len)
> +{
> + return transfer_len <= HPB_MULTI_CHUNK_HIGH;
> +}

The names used in the above function are mysterious. What is a support
chunk? What does "multi chunk high" mean? Please add a comment.

> +static inline u32 ufshpb_get_lpn(struct scsi_cmnd *cmnd)
> +{
> + return blk_rq_pos(cmnd->request) >>
> + (ilog2(cmnd->device->sector_size) - 9);
> +}
>
> +static inline unsigned int ufshpb_get_len(struct scsi_cmnd *cmnd)
> +{
> + return blk_rq_sectors(cmnd->request) >>
> + (ilog2(cmnd->device->sector_size) - 9);
> +}

Do the above two functions perhaps each include a duplicate of
sectors_to_logical()?

> +/* routine : READ10 -> HPB_READ  */
> +static void ufshpb_prep_fn(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
> +{
> + struct ufshpb_lu *hpb;
> + struct ufshpb_region *rgn;
> + struct ufshpb_subregion *srgn;
> + struct scsi_cmnd *cmd = lrbp->cmd;
> + u32 lpn;
> + u64 ppn;
> + unsigned long flags;
> + int transfer_len, rgn_idx, srgn_idx, srgn_offset;
> + int err = 0;
> +
> + hpb = ufshpb_get_hpb_data(cmd);
> + err = ufshpb_lu_get(hpb);
> + if (unlikely(err))
> + return;
> +
> + WARN_ON(hpb->lun != cmd->device->lun);
^^^
WARN_ON_ONCE()?

> + if (!ufshpb_is_write_discard_cmd(cmd) &&
> + !ufshpb_is_read_cmd(cmd))
> + goto put_hpb;
> +
> + transfer_len = ufshpb_get_len(cmd);
> + if (unlikely(!transfer_len))
> + goto put_hpb;
> +
> + lpn = ufshpb_get_lpn(cmd);
> + ufshpb_get_pos_from_lpn(hpb, lpn, _idx, _idx, _offset);
> + rgn = hpb->rgn_tbl + rgn_idx;
> + srgn = rgn->srgn_tbl + srgn_idx;
> +
> + /* If commnad type is WRITE and DISCARD, set bitmap as drity */
  ^^^   ^^^^
  command?  or?dirty?
> + if (ufshpb_is_write_discard_cmd(cmd)) {
> + spin_lock_irqsave(>hpb_state_lock, flags);
> + ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
> +  transfer_len);
> + spin_unlock_irqrestore(>hpb_state_lock, flags);
> + goto put_hpb;
> + }
> +
> + WARN_ON(!ufshpb_is_read_cmd(cmd));
^^^
WARN_ON_ONCE()?

> + if (!ufshpb_is_support_chunk(transfer_len))
> + goto put_hpb;
> +
> + spin_lock_irqsave(>hpb_state_lock, flags);
> + if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
> +transfer_len)) {
> + atomic_inc(>stats.miss_cnt);
> + spin_unlock_irqrestore(>hpb_state_lock, flags);
> + goto put_hpb;
> + }
> +
> + ppn = ufshpb_get_ppn(hpb, srgn->mctx, srgn_offset, );
> + spin_unlock_irqrestore(>hpb_state_lock, flags);
> + if (unlikely(err)) {
> + /*
> +  * In this case, the region state is active,
> +  * but the ppn table is not allocated.
> +  * Make sure that ppn tabie must be allocated on
  ^
  table?
> +  * active state
> +  */
> + WARN_ON(true);
> + dev_err(>hpb_lu_dev,
> + "ufshpb_get_ppn failed. err %d\n", err);
> + goto put_hpb;
> + }
> +
> + ufshpb_set_hpb_read_to_upiu(hpb, lrbp, lpn, ppn, transfer_len);
> +
> + atomic_inc(>stats.hit_cnt);
> +put_hpb:
> + ufshpb_lu_put(hpb);
> +}

Thanks,

Bart.


Re: [RFC PATCH 5/5] scsi: ufs: Prepare HPB read for cached sub-region

2020-06-08 Thread Bart Van Assche
On 2020-06-06 11:38, Avri Altman wrote:
>> +   for (i = 0; i < bit_len; i++) {
>> +   if (test_bit(srgn_offset + i, srgn->mctx->ppn_dirty))
>
> Maybe use a mask or hweight instead of testing bit by bit?

How about using find_next_bit() from include/linux/bitmap.h?

/*
 *  find_next_bit(addr, nbits, bit) Position next set bit in *addr
 *  >= bit
 */

Thanks,

Bart.


RE: [RFC PATCH 5/5] scsi: ufs: Prepare HPB read for cached sub-region

2020-06-08 Thread Daejun Park
> > +static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx,
> > +  int srgn_idx, int srgn_offset, int cnt)

> > +
> > +   for (i = 0; i < bit_len; i++) {
> > +   if (test_bit(srgn_offset + i, srgn->mctx->ppn_dirty))
> Maybe use a mask or hweight instead of testing bit by bit?
There is no problem in this HPB vesion because it only supports 4KB sized read 
IO.
However, this code is not as efficient as you pointed out. So I will change 
this in HPB version 2.0.

> > +static void
> > +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb
> > *lrbp,
> > + u32 lpn, u64 ppn,  unsigned int 
> > transfer_len)
> > +{
> > +   unsigned char *cdb = lrbp->ucd_req_ptr->sc.cdb;
> > +
> > +   cdb[0] = UFSHPB_READ;
> > +
> > +   put_unaligned_be32(lpn, [2]);
> Is this needed? The lba is already occupying bytes 2..5
The needless code will be deleted on next patch.

Thanks,
Daejun


RE: [RFC PATCH 5/5] scsi: ufs: Prepare HPB read for cached sub-region

2020-06-06 Thread Avri Altman

> +static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx,
> +  int srgn_idx, int srgn_offset, int cnt)


> +
> +   for (i = 0; i < bit_len; i++) {
> +   if (test_bit(srgn_offset + i, srgn->mctx->ppn_dirty))
Maybe use a mask or hweight instead of testing bit by bit?

> +static void
> +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb
> *lrbp,
> + u32 lpn, u64 ppn,  unsigned int 
> transfer_len)
> +{
> +   unsigned char *cdb = lrbp->ucd_req_ptr->sc.cdb;
> +
> +   cdb[0] = UFSHPB_READ;
> +
> +   put_unaligned_be32(lpn, [2]);
Is this needed? The lba is already occupying bytes 2..5

> +   put_unaligned_be64(ppn, [6]);
> +   cdb[14] = transfer_len;
> +}
> +

Thanks,
Avri