Re: [PATCH v5] sd: Check for unaligned partial completion
Bart, On 2/21/17 13:21, Bart Van Assche wrote: > On 02/20/2017 06:35 PM, Martin K. Petersen wrote: >> I'm still not keen on having two orthogonal sanity checks wrt. figuring >> out how much of a request has been completed. >> >> Also, I find your approach hard to follow in the case where >> sd_completed_bytes() is called after the resid has been adjusted. It >> works, but it's not immediately obvious that that's the case. Which to >> me is an indication that this entire thing needs a thorough cleanup. > > Hello Martin and Damien, > > How about the following: > * Add a function to the block layer that reports whether or not the > request is a medium access request. The number of transferred bytes > for a medium access request is a multiple of the logical block size. > (The terminology "medium access command" comes from the SCSI specs.) > * Use that function instead of "scmd->request->cmd_type == REQ_TYPE_FS" > in the mpt3sas driver. > * Do not modify sd_done(). > > This approach has the advantage that the mpt3sas firmware bug workaround > does not slow down the hot path of the sd driver when another LLD than > mpt3sas is used. I think we would still need the check for REQ_TYPE_FS to avoid interfering with SG_IO commands. As for the "medium access command" test, I am not sure if the block layer is the right place to define that since a request operation may map to different commands depending on the device type (e.g. REQ_OP_DISCARD which can become unmap or write same in SCSI). Martin, Which approach do you prefer ? Keeping everything contained to mpt3sas (so basically just fixing the problematic patch), or cleaning up everything with sd_completed_bytes rewrite ? Best regards. -- Damien Le Moal, Ph.D. Sr. Manager, System Software Research Group, Western Digital Corporation damien.lem...@wdc.com (+81) 0466-98-3593 (ext. 513593) 1 kirihara-cho, Fujisawa, Kanagawa, 252-0888 Japan www.wdc.com, www.hgst.com
[PATCH] scsi: ufs: Factor out ufshcd_read_desc_param
Since in UFS 2.1 specification some of the descriptor lengths differs from 2.0 specification and some devices, which are reporting spec version 2.0 have different descriptor lengths we can not rely on hardcoded values taken from 2.0 specification. This patch introduces reading these lengths per each device from descriptor headers at probe time to ensure their correctness. Signed-off-by: Michal' Potomski--- drivers/scsi/ufs/ufs.h| 22 ++--- drivers/scsi/ufs/ufshcd.c | 230 ++ drivers/scsi/ufs/ufshcd.h | 15 +++ 3 files changed, 195 insertions(+), 72 deletions(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 8e6709a..9e1b1a8 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -146,7 +146,7 @@ enum attr_idn { /* Descriptor idn for Query requests */ enum desc_idn { QUERY_DESC_IDN_DEVICE = 0x0, - QUERY_DESC_IDN_CONFIGURAION = 0x1, + QUERY_DESC_IDN_CONFIGURATION= 0x1, QUERY_DESC_IDN_UNIT = 0x2, QUERY_DESC_IDN_RFU_0= 0x3, QUERY_DESC_IDN_INTERCONNECT = 0x4, @@ -162,19 +162,13 @@ enum desc_header_offset { QUERY_DESC_DESC_TYPE_OFFSET = 0x01, }; -enum ufs_desc_max_size { - QUERY_DESC_DEVICE_MAX_SIZE = 0x40, - QUERY_DESC_CONFIGURAION_MAX_SIZE= 0x90, - QUERY_DESC_UNIT_MAX_SIZE= 0x23, - QUERY_DESC_INTERCONNECT_MAX_SIZE= 0x06, - /* -* Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes -* of descriptor header. -*/ - QUERY_DESC_STRING_MAX_SIZE = 0xFE, - QUERY_DESC_GEOMETRY_MAX_SIZE= 0x44, - QUERY_DESC_POWER_MAX_SIZE = 0x62, - QUERY_DESC_RFU_MAX_SIZE = 0x00, +enum ufs_desc_def_size { + QUERY_DESC_DEVICE_DEF_SIZE = 0x40, + QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90, + QUERY_DESC_UNIT_DEF_SIZE= 0x23, + QUERY_DESC_INTERCONNECT_DEF_SIZE= 0x06, + QUERY_DESC_GEOMETRY_DEF_SIZE= 0x44, + QUERY_DESC_POWER_DEF_SIZE = 0x62, }; /* Unit descriptor parameters offsets in bytes*/ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 20e5e5f..79d5055 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -94,19 +94,6 @@ _ret; \ }) -static u32 ufs_query_desc_max_size[] = { - QUERY_DESC_DEVICE_MAX_SIZE, - QUERY_DESC_CONFIGURAION_MAX_SIZE, - QUERY_DESC_UNIT_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, - QUERY_DESC_INTERCONNECT_MAX_SIZE, - QUERY_DESC_STRING_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, - QUERY_DESC_GEOMETRY_MAX_SIZE, - QUERY_DESC_POWER_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, -}; - enum { UFSHCD_MAX_CHANNEL = 0, UFSHCD_MAX_ID = 1, @@ -2012,7 +1999,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, goto out; } - if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) { + if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) { dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n", __func__, *buf_len); err = -EINVAL; @@ -2092,6 +2079,92 @@ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, EXPORT_SYMBOL(ufshcd_query_descriptor_retry); /** + * ufshcd_read_desc_length - read the specified descriptor length from header + * @hba: Pointer to adapter instance + * @desc_id: descriptor idn value + * @desc_index: descriptor index + * @desc_length: pointer to variable to read the length of descriptor + * + * Return 0 in case of success, non-zero otherwise + */ +static int ufshcd_read_desc_length(struct ufs_hba *hba, + enum desc_idn desc_id, + int desc_index, + int *desc_length) +{ + int ret; + u8 header[QUERY_DESC_HDR_SIZE]; + int header_len = QUERY_DESC_HDR_SIZE; + + if (desc_id >= QUERY_DESC_IDN_MAX) + return -EINVAL; + + ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC, + desc_id, desc_index, 0, header, + _len); + + if (ret) { + dev_err(hba->dev, "%s: Failed to get descriptor header id %d", + __func__, desc_id); + return ret; + } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) { + dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch", + __func__, header[QUERY_DESC_DESC_TYPE_OFFSET], + desc_id); + ret = -EINVAL; + } + + *desc_length =
Re: [PATCH] scsi: zero per-cmd driver data before each I/O
On 02/20/2017 05:52 PM, Christoph Hellwig wrote: > Without this drivers that don't clear the state themselves can see off > effects. For example Hyper-V VMs using the storvsc driver will often > hang during boot due to uncleared Test Unit Ready failures. > > Fixes: e9c787e6 ("scsi: allocate scsi_cmnd structures as part of struct > request") > Signed-off-by: Christoph Hellwig> Reported-by: Dexuan Cui > Tested-by: Dexuan Cui > --- > drivers/scsi/scsi_lib.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index 90f65c8f487a..daced9db8af8 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -1167,6 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct > scsi_cmnd *cmd) > /* zero out the cmd, except for the embedded scsi_request */ > memset((char *)cmd + sizeof(cmd->req), 0, > sizeof(*cmd) - sizeof(cmd->req)); > + memset((char *)(cmd + 1), 0, dev->host->hostt->cmd_size); > > cmd->device = dev; > cmd->sense_buffer = buf; > Reviewed-by: Hannes Reinecke Cheers, Hannes -- Dr. Hannes ReineckeTeamlead Storage & Networking h...@suse.de +49 911 74053 688 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg)
Re: [PATCH v5] sd: Check for unaligned partial completion
On 02/20/2017 06:35 PM, Martin K. Petersen wrote: > I'm still not keen on having two orthogonal sanity checks wrt. figuring > out how much of a request has been completed. > > Also, I find your approach hard to follow in the case where > sd_completed_bytes() is called after the resid has been adjusted. It > works, but it's not immediately obvious that that's the case. Which to > me is an indication that this entire thing needs a thorough cleanup. Hello Martin and Damien, How about the following: * Add a function to the block layer that reports whether or not the request is a medium access request. The number of transferred bytes for a medium access request is a multiple of the logical block size. (The terminology "medium access command" comes from the SCSI specs.) * Use that function instead of "scmd->request->cmd_type == REQ_TYPE_FS" in the mpt3sas driver. * Do not modify sd_done(). This approach has the advantage that the mpt3sas firmware bug workaround does not slow down the hot path of the sd driver when another LLD than mpt3sas is used. Bart.
Re: [PATCH v5] sd: Check for unaligned partial completion
> "Damien" == Damien Le Moalwrites: Damien, Damien> Initially, I didn't want to change more than I did so that the Damien> patch quickly make it to 4.10 and we get ZBC working with LSI Damien> HBAs. Since this did not happen and we have more time ahead, I Damien> can respin everything into sd_completed_bytes to clean things Damien> up. Damien> I will resend a patch. Great, thanks! Would be nice to get this cleaned up. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v3 00/16] lpfc: Add NVME Fabrics support
> "James" == James Smartwrites: James> This patch set adds support for NVME over Fabrics FC transport to James> lpfc Applied to 4.11/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v5] sd: Check for unaligned partial completion
Martin, On 2/21/17 11:35, Martin K. Petersen wrote: >> "Damien" == Damien Le Moalwrites: > > Hi Damien, > > Damien> Move the partial completion alignement check of mpt3sas to a > Damien> generic implementation in sd_done so that the check ignores > Damien> REQ_TYPE_FS requests with special payload size handling > Damien> (REQ_OP_DISCARD, REQ_OP_WRITE_SAME, REQ_OP_ZONE_REPORT and > Damien> REQ_OP_ZONE_RESET). For the remaining REQ_OP_FLUSH, REQ_OP_READ > Damien> and REQ_OP_WRITE, we only need to check the resid alignment, > Damien> correct it if necessary and then correct good_bytes. Note that > Damien> in this case, good_bytes will always initially be 0 or aligned > Damien> on the device logical block size, so correcting resid alignment > Damien> will always result in good_bytes also being properly aligned. > > I'm still not keen on having two orthogonal sanity checks wrt. figuring > out how much of a request has been completed. > > Also, I find your approach hard to follow in the case where > sd_completed_bytes() is called after the resid has been adjusted. It > works, but it's not immediately obvious that that's the case. Which to > me is an indication that this entire thing needs a thorough cleanup. > > If you don't feel like mucking more with this, I understand. In that > case might pick up your patch and attempt to clean things up later. Initially, I didn't want to change more than I did so that the patch quickly make it to 4.10 and we get ZBC working with LSI HBAs. Since this did not happen and we have more time ahead, I can respin everything into sd_completed_bytes to clean things up. I will resend a patch. Best regards. -- Damien Le Moal, Ph.D. Sr. Manager, System Software Research Group, Western Digital Corporation damien.lem...@wdc.com (+81) 0466-98-3593 (ext. 513593) 1 kirihara-cho, Fujisawa, Kanagawa, 252-0888 Japan www.wdc.com, www.hgst.com
Re: [PATCH] cxlflash: Enable PCI device ID for future IBM CXL Flash AFU
> "Uma" == Uma Krishnanwrites: Uma> From: "Matthew R. Ochs" Add support for Uma> a future IBM Coherent Accelerator (CXL) flash AFU with an ID of Uma> 0x0624. Applied to 4.11/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCHv3] scsi: use 'scsi_device_from_queue()' for scsi_dh
> "Hannes" == Hannes Reineckewrites: Hannes> The device handler needs to check if a given queue belongs to a Hannes> scsi device; only then does it make sense to attach a device Hannes> handler. Fixed kbuild warning and applied to 4.11/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH 0/3] qla2xxx: Bug fixes and cleanup for the driver.
> "Himanshu" == Himanshu Madhaniwrites: Hi Himanshu, Himanshu> This series contains small cleanup + fix for regression that Himanshu> was introduced by pci_alloc_irq_vectors_affinity() call in Himanshu> driver. Himanshu> Please apply this series to 4.10/scsi-fixes at your earliest Himanshu> convenience. 4.10 is out and this series does not apply to 4.11/scsi-queue. Please rebase. Also, please make sure your "Fixes: d745952 ("scsi: qla2xxx:..." have a 12-char hash. And add a stable tag if you want these in 4.10.x. Thanks! -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v1 1/1] scsi: ufs-qcom: remove redundant condition check
> "Subhash" == Subhash Jadavaniwrites: Subhash, Subhash> Dan Carpenter reported this: --- The Subhash> patch 9c46b8676271: "scsi: ufs-qcom: dump additional testbus Subhash> registers" from Feb 3, 2017, leads to the following static Subhash> checker warning: You had used "---" to enclose Dan's email. But that's also the "ignore everything beyond this point" delimiter so I ended up with an empty commit message. I fixed it up. Applied to 4.11/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH] smartpqi: fix time handling
> "Arnd" == Arnd Bergmannwrites: Arnd> When we have turned off RTC support, the smartpqi driver fails to Arnd> build: ERROR: "rtc_time64_to_tm" Arnd> [drivers/scsi/smartpqi/smartpqi.ko] undefined! Applied to 4.11/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH V2 00/15] aacraid: Fixes and enhancements for arc family
> "Raghava" == Raghava Aditya Renukunta >writes: Raghava> This patch set contains issue fixes, enhancements and other Raghava> misc changes. The majority of the fixes are a direct outcome Raghava> of testing and work done on the adapter reset Raghava> mechanism. Initially it just had IOP reset and then was Raghava> augmented with IWBR soft hardware resets in the previous patch Raghava> set. The reset mechanism is triggered in 2 paths, one is from Raghava> the eh handler from the kernel and the other is from the Raghava> driver's internal periodic health checkup. Applied to 4.11/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v5] sd: Check for unaligned partial completion
> "Damien" == Damien Le Moalwrites: Hi Damien, Damien> Move the partial completion alignement check of mpt3sas to a Damien> generic implementation in sd_done so that the check ignores Damien> REQ_TYPE_FS requests with special payload size handling Damien> (REQ_OP_DISCARD, REQ_OP_WRITE_SAME, REQ_OP_ZONE_REPORT and Damien> REQ_OP_ZONE_RESET). For the remaining REQ_OP_FLUSH, REQ_OP_READ Damien> and REQ_OP_WRITE, we only need to check the resid alignment, Damien> correct it if necessary and then correct good_bytes. Note that Damien> in this case, good_bytes will always initially be 0 or aligned Damien> on the device logical block size, so correcting resid alignment Damien> will always result in good_bytes also being properly aligned. I'm still not keen on having two orthogonal sanity checks wrt. figuring out how much of a request has been completed. Also, I find your approach hard to follow in the case where sd_completed_bytes() is called after the resid has been adjusted. It works, but it's not immediately obvious that that's the case. Which to me is an indication that this entire thing needs a thorough cleanup. If you don't feel like mucking more with this, I understand. In that case might pick up your patch and attempt to clean things up later. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v5] sd: Check for unaligned partial completion
Bart, On 2/21/17 02:34, Bart Van Assche wrote: > On 02/16/2017 04:20 PM, Damien Le Moal wrote: >> Move the partial completion alignement check of mpt3sas to a generic >> implementation in sd_done so that the check ignores REQ_TYPE_FS >> requests with special payload size handling (REQ_OP_DISCARD, >> REQ_OP_WRITE_SAME, REQ_OP_ZONE_REPORT and REQ_OP_ZONE_RESET). > > Hello Damien, > > Since the resid adjustment code is skipped for REQ_OP_DISCARD and > REQ_OP_WRITE_SAME: does the mpt3sas firmware ensure that 'resid' is > properly aligned for these request types? Did I perhaps miss something? No, I do not think anything special is done for these commands in the driver (I did not see any code to that effect). But for discard, since the payload is one sector, it always should be aligned (or 0). So the actual value from the driver is ignored and overwritten in the switch-case code with the actual request size on success (not the payload size) and 0 on error. If anything, if the HW returned a non-0 resid, we should mark the command as failed. For write-same, it is basically the same. The payload is one page containing the sector range to discard (soon multiple ranges actually). If the commands succeeded, then clearly resid should be 0 and the value returned by the HW ignored. In case of a failed command, no matter what resid is, the discard is marked as entirely failed. Similarly to write-same, a success with a non-0 resid is non-sensical. So we could force the request as failed here in this case. Best regards. -- Damien Le Moal, Ph.D. Sr. Manager, System Software Research Group, Western Digital Corporation damien.lem...@wdc.com (+81) 0466-98-3593 (ext. 513593) 1 kirihara-cho, Fujisawa, Kanagawa, 252-0888 Japan www.wdc.com, www.hgst.com
[GIT PULL] first round of SCSI updates for the 4.10+ merge window
This update includes the usual round of major driver updates (ncr5380, ufs, lpfc, be2iscsi, hisi_sas, storvsc, cxlflash, aacraid, megaraid_sas, ). There's also an assortment of minor fixes and the major update of switching another bunch of drivers to pci_alloc_irq_vectors from Christoph. The patch is available here: git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git scsi-misc The short changelog is: Arnd Bergmann (2): scsi: megaraid_sas: handle dma_addr_t right on 32-bit scsi: aacraid: avoid open-coded upper_32_bits Cao jin (1): scsi: qla4xxx: comments correction Chaitra P B (4): scsi: mpt3sas: Updating driver version to v15.100.00.00 scsi: mpt3sas: Fix Firmware fault state 0x2100 during heavy 4K RR FIO stress test. scsi: mpt3sas: Fix for Crusader to achieve product targets with SAS devices. scsi: mpt3sas: Added print to notify cable running at a degraded speed. Christoph Hellwig (11): scsi: snic: switch to pci_irq_alloc_vectors scsi: pm8001: switch to pci_irq_alloc_vectors scsi: remove tsk_mgmt_response and it_nexus_response transport methods scsi: remove eh_timed_out methods in the transport template scsi: libsas: remove sas_scsi_timed_out scsi: mvumi: remove fake transport template scsi: storvsc: remove bogus code to transfer struct scatterlist scsi: qla4xxx: remove two unused MSI-X related #defines scsi: vmw_pvscsi: switch to pci_alloc_irq_vectors scsi: pmcraid: switch to pci_alloc_irq_vectors scsi: bfa: remove bfa_fcs_mod_s Colin Ian King (3): scsi: aacraid: rcode is unsigned and should be signed int scsi: qla2xxx: rename {vendor|hba}_indentifer to {vendor|hba}_identifer scsi: qedi: return via va_end to match corresponding va_start Corentin Labbe (1): scsi: mptlan: Remove linux/miscdevice.h from mptlan.h Dan Carpenter (2): scsi: megaraid_sas: array overflow in megasas_dump_frame() scsi: dpt_i2o: double free if adpt_i2o_online_hba() fails Dolev Raviv (3): scsi: ufs: Improve fatal error logs scsi: ufs: fix multiple ufs spec violation scsi: ufs: dump debug info during failures Don Brace (1): scsi: hpsa: remove coalescing settings for ioaccel2 Emese Revfy (1): scsi: esas2r: Fix format string type mistakes Finn Thain (6): scsi: atari_scsi: Reset DMA during bus reset only under ST-DMA lock scsi: ncr5380: Improve target selection robustness scsi: ncr5380: Resolve various static checker warnings scsi: ncr5380: Reduce #include files scsi: ncr5380: Clean up dead code and redundant macro usage scsi: ncr5380: Shorten host info string by removing unused option macros Gilad Broner (3): scsi: ufs: add host state prints in failure cases scsi: ufs: reduce printout for aborted requests scsi: ufs: skip request abort task when previous aborts failed Hanjun Guo (1): scsi: remove useless acpi functions in the header file James Smart (13): scsi: lpfc: Fix lpfc_wwn_set return code check scsi: lpfc: Reinstate lpfc_soft_wwn parameter scsi: lpfc: lpfc version change to 11.2.0.4 scsi: lpfc: Add missing memory barrier scsi: lpfc: Correct oops on vport port resets scsi: lpfc: Deprecate lpfc_prot_sg_seg_cnt parameter scsi: lpfc: Fix Xlane dynamic LUN set for LUN priority. scsi: lpfc: FCoE VPort enable-disable does not bring up the VPort scsi: lpfc: Correct host name in symbolic_name field scsi: lpfc: Correct issue leading to oops during link reset scsi: lpfc: Deprecate lpfc_soft_wwn parameter scsi: lpfc: Correct error in setting OS Driver Version with FW scsi: lpfc: Clear the VendorVersion in the PLOGI/PLOGI ACC payload Javier Martinez Canillas (1): scsi: ufs-qcom: Fix module autoload Jitendra Bhivare (12): scsi: be2iscsi: Update driver version scsi: be2iscsi: Reinit SGL handle, CID tables after TPE scsi: be2iscsi: Add checks to validate CID alloc/free scsi: be2iscsi: Remove wq_name from beiscsi_hba scsi: be2iscsi: Remove unused struct members scsi: be2iscsi: Remove redundant receive buffers posting scsi: be2iscsi: Fix iSCSI cmd cleanup IOCTL scsi: be2iscsi: Add checks to validate completions scsi: be2iscsi: Set WRB invalid bit for SkyHawk scsi: be2iscsi: Take iscsi_task ref in abort handler scsi: be2iscsi: Fix for crash in beiscsi_eh_device_reset scsi: be2iscsi: Fix use of invalidate command table req John Garry (5): scsi: hisi_sas: downgrade internal abort exit print scsi: hisi_sas: downgrade refclk message scsi: hisi_sas: lock sensitive region in hisi_sas_slot_abort() scsi: hisi_sas: lock sensitive regions when servicing CQ interrupt scsi: hisi_sas: service v2 hw CQ ISR with tasklet John Pittman (1): scsi: sd: Cleaned up comment references to @sdp argument explanation. K. Y.
RE
I am Viet Xuan Luong. I am an American soldier presently on active service with 4th Squadron battalion here in Afghanistan. I served with the third Infantry Division in Iraq before thousand of my lucky colleagues were pulled out in August Last year, leaving me among the unlucky ones drafted to Afghanistan where I am serving presently. Am seeking your urgent help, please get back to me via email and I will tell you more about it thank you and God bless you. Respectfully submitted. vietxuanluong...@gmail.com Viet Xuan Luong
RE
I am Viet Xuan Luong. I am an American soldier presently on active service with 4th Squadron battalion here in Afghanistan. I served with the third Infantry Division in Iraq before thousand of my lucky colleagues were pulled out in August Last year, leaving me among the unlucky ones drafted to Afghanistan where I am serving presently. Am seeking your urgent help, please get back to me via email and I will tell you more about it thank you and God bless you. Respectfully submitted. vietxuanluong...@gmail.com Viet Xuan Luong
Re: [PATCH v5] sd: Check for unaligned partial completion
On 02/16/2017 04:20 PM, Damien Le Moal wrote: > Move the partial completion alignement check of mpt3sas to a generic > implementation in sd_done so that the check ignores REQ_TYPE_FS > requests with special payload size handling (REQ_OP_DISCARD, > REQ_OP_WRITE_SAME, REQ_OP_ZONE_REPORT and REQ_OP_ZONE_RESET). Hello Damien, Since the resid adjustment code is skipped for REQ_OP_DISCARD and REQ_OP_WRITE_SAME: does the mpt3sas firmware ensure that 'resid' is properly aligned for these request types? Did I perhaps miss something? Thanks, Bart.
Re: [PATCH 0/5] block subsystem refcounter conversions
On Mon, 2017-02-20 at 17:56 +0100, Peter Zijlstra wrote: > On Mon, Feb 20, 2017 at 07:41:01AM -0800, James Bottomley wrote: > > On Mon, 2017-02-20 at 08:15 -0700, Jens Axboe wrote: > > > On 02/20/2017 04:16 AM, Elena Reshetova wrote: > > > > Now when new refcount_t type and API are finally merged > > > > (see include/linux/refcount.h), the following > > > > patches convert various refcounters in the block susystem from > > > > atomic_t to refcount_t. By doing this we prevent intentional or > > > > accidental underflows or overflows that can led to use-after > > > > -free vulnerabilities. > > > > This description isn't right ... nothing is prevented; we get > > warnings on saturation and use after free with this. > > The thing that is prevented is overflow and then a use-after-free by > making it a leak. > > Modular stuff, you put and free at: (n+1) mod n, by saturating at n-1 > we'll never get there. > > So you loose use-after-free, you gain a resource leak. The general > idea being that use-after-free is a nice trampoline for exploits, > leaks are 'only' a DoS. OK, I see the intention: it's protection from outside influence. It still doesn't prevent *us* from screwing up in the kernel and inducing a use after free by doing too many puts (or too few gets) ... that's what the message suggests to me (me coding wrongly is accidental underflows or overflows as I read it). James
Re: [PATCH] scsi: zero per-cmd driver data before each I/O
On 02/20/2017 08:52 AM, Christoph Hellwig wrote: > Without this drivers that don't clear the state themselves can see off > effects. For example Hyper-V VMs using the storvsc driver will often > hang during boot due to uncleared Test Unit Ready failures. > > Fixes: e9c787e6 ("scsi: allocate scsi_cmnd structures as part of struct > request") > Signed-off-by: Christoph Hellwig> Reported-by: Dexuan Cui > Tested-by: Dexuan Cui > --- > drivers/scsi/scsi_lib.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index 90f65c8f487a..daced9db8af8 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -1167,6 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct > scsi_cmnd *cmd) > /* zero out the cmd, except for the embedded scsi_request */ > memset((char *)cmd + sizeof(cmd->req), 0, > sizeof(*cmd) - sizeof(cmd->req)); > + memset((char *)(cmd + 1), 0, dev->host->hostt->cmd_size); Hello Christoph, Since this code is in the hot path: please combine the two memset() calls into a single memset() call. Thanks, Bart.
Re: [PATCH 0/5] block subsystem refcounter conversions
On Mon, Feb 20, 2017 at 07:41:01AM -0800, James Bottomley wrote: > On Mon, 2017-02-20 at 08:15 -0700, Jens Axboe wrote: > > On 02/20/2017 04:16 AM, Elena Reshetova wrote: > > > Now when new refcount_t type and API are finally merged > > > (see include/linux/refcount.h), the following > > > patches convert various refcounters in the block susystem from > > > atomic_t to refcount_t. By doing this we prevent intentional or > > > accidental underflows or overflows that can led to use-after-free > > > vulnerabilities. > > This description isn't right ... nothing is prevented; we get warnings > on saturation and use after free with this. The thing that is prevented is overflow and then a use-after-free by making it a leak. Modular stuff, you put and free at: (n+1) mod n, by saturating at n-1 we'll never get there. So you loose use-after-free, you gain a resource leak. The general idea being that use-after-free is a nice trampoline for exploits, leaks are 'only' a DoS.
[PATCH] scsi: zero per-cmd driver data before each I/O
Without this drivers that don't clear the state themselves can see off effects. For example Hyper-V VMs using the storvsc driver will often hang during boot due to uncleared Test Unit Ready failures. Fixes: e9c787e6 ("scsi: allocate scsi_cmnd structures as part of struct request") Signed-off-by: Christoph HellwigReported-by: Dexuan Cui Tested-by: Dexuan Cui --- drivers/scsi/scsi_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 90f65c8f487a..daced9db8af8 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1167,6 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) /* zero out the cmd, except for the embedded scsi_request */ memset((char *)cmd + sizeof(cmd->req), 0, sizeof(*cmd) - sizeof(cmd->req)); + memset((char *)(cmd + 1), 0, dev->host->hostt->cmd_size); cmd->device = dev; cmd->sense_buffer = buf; -- 2.11.0
Re: [PATCH 0/5] block subsystem refcounter conversions
On 02/20/2017 08:41 AM, James Bottomley wrote: > On Mon, 2017-02-20 at 08:15 -0700, Jens Axboe wrote: >> On 02/20/2017 04:16 AM, Elena Reshetova wrote: >>> Now when new refcount_t type and API are finally merged >>> (see include/linux/refcount.h), the following >>> patches convert various refcounters in the block susystem from >>> atomic_t to refcount_t. By doing this we prevent intentional or >>> accidental underflows or overflows that can led to use-after-free >>> vulnerabilities. > > This description isn't right ... nothing is prevented; we get warnings > on saturation and use after free with this. > >>> The below patches are fully independent and can be cherry-picked >>> separately. Since we convert all kernel subsystems in the same >>> fashion, resulting in about 300 patches, we have to group them for >>> sending at least in some fashion to be manageable. Please excuse >>> the long cc list. >>> >>> Elena Reshetova (5): >>> block: convert bio.__bi_cnt from atomic_t to refcount_t >>> block: convert blk_queue_tag.refcnt from atomic_t to refcount_t >>> block: convert blkcg_gq.refcnt from atomic_t to refcount_t >>> block: convert io_context.active_ref from atomic_t to refcount_t >>> block: convert bsg_device.ref_count from atomic_t to refcount_t >> >> I went to look at the implementation, and the size of a refcount_t. >> But the code is not there?! You say it's finally merged, where is >> it merged? Don't send code like this without the necessary >> infrastructure being in mainline. > > It's one of those no discussion except notice by tipbot things because > Ingo liked it. > > The size is atomic_t, but the primitives check for overflow and check > inc from zero and things, so in a true refcounting situation we get > automatic warnings of problems inside the primitives. > > That said, if we were going to convert the block layer to this > semantic, surely the benefit of the conversion would be getting rid of > the now unnecessary BUG_ON and WARN_ON in the code, which do exactly > the same thing the refcount primitives now do? Well, I have no idea what it does, which is why I went to look at the code. So any talk of converting the block layer is premature. But it's not there. I'll defer judgment until we have something in mainline, until then I've archived this thread. And I agree, keeping warn/bug for cases that should be handled with this framework would be counter productive and pointless. -- Jens Axboe
Re: [PATCH 0/5] block subsystem refcounter conversions
On Mon, 2017-02-20 at 08:15 -0700, Jens Axboe wrote: > On 02/20/2017 04:16 AM, Elena Reshetova wrote: > > Now when new refcount_t type and API are finally merged > > (see include/linux/refcount.h), the following > > patches convert various refcounters in the block susystem from > > atomic_t to refcount_t. By doing this we prevent intentional or > > accidental underflows or overflows that can led to use-after-free > > vulnerabilities. This description isn't right ... nothing is prevented; we get warnings on saturation and use after free with this. > > The below patches are fully independent and can be cherry-picked > > separately. Since we convert all kernel subsystems in the same > > fashion, resulting in about 300 patches, we have to group them for > > sending at least in some fashion to be manageable. Please excuse > > the long cc list. > > > > Elena Reshetova (5): > > block: convert bio.__bi_cnt from atomic_t to refcount_t > > block: convert blk_queue_tag.refcnt from atomic_t to refcount_t > > block: convert blkcg_gq.refcnt from atomic_t to refcount_t > > block: convert io_context.active_ref from atomic_t to refcount_t > > block: convert bsg_device.ref_count from atomic_t to refcount_t > > I went to look at the implementation, and the size of a refcount_t. > But the code is not there?! You say it's finally merged, where is > it merged? Don't send code like this without the necessary > infrastructure being in mainline. It's one of those no discussion except notice by tipbot things because Ingo liked it. The size is atomic_t, but the primitives check for overflow and check inc from zero and things, so in a true refcounting situation we get automatic warnings of problems inside the primitives. That said, if we were going to convert the block layer to this semantic, surely the benefit of the conversion would be getting rid of the now unnecessary BUG_ON and WARN_ON in the code, which do exactly the same thing the refcount primitives now do? James
Re: [PATCH 0/5] block subsystem refcounter conversions
On 02/20/2017 04:16 AM, Elena Reshetova wrote: > Now when new refcount_t type and API are finally merged > (see include/linux/refcount.h), the following > patches convert various refcounters in the block susystem from atomic_t > to refcount_t. By doing this we prevent intentional or accidental > underflows or overflows that can led to use-after-free vulnerabilities. > > The below patches are fully independent and can be cherry-picked separately. > Since we convert all kernel subsystems in the same fashion, resulting > in about 300 patches, we have to group them for sending at least in some > fashion to be manageable. Please excuse the long cc list. > > Elena Reshetova (5): > block: convert bio.__bi_cnt from atomic_t to refcount_t > block: convert blk_queue_tag.refcnt from atomic_t to refcount_t > block: convert blkcg_gq.refcnt from atomic_t to refcount_t > block: convert io_context.active_ref from atomic_t to refcount_t > block: convert bsg_device.ref_count from atomic_t to refcount_t I went to look at the implementation, and the size of a refcount_t. But the code is not there?! You say it's finally merged, where is it merged? Don't send code like this without the necessary infrastructure being in mainline. -- Jens Axboe
[PATCH 0/5] block subsystem refcounter conversions
Now when new refcount_t type and API are finally merged (see include/linux/refcount.h), the following patches convert various refcounters in the block susystem from atomic_t to refcount_t. By doing this we prevent intentional or accidental underflows or overflows that can led to use-after-free vulnerabilities. The below patches are fully independent and can be cherry-picked separately. Since we convert all kernel subsystems in the same fashion, resulting in about 300 patches, we have to group them for sending at least in some fashion to be manageable. Please excuse the long cc list. Elena Reshetova (5): block: convert bio.__bi_cnt from atomic_t to refcount_t block: convert blk_queue_tag.refcnt from atomic_t to refcount_t block: convert blkcg_gq.refcnt from atomic_t to refcount_t block: convert io_context.active_ref from atomic_t to refcount_t block: convert bsg_device.ref_count from atomic_t to refcount_t block/bio.c| 6 +++--- block/blk-cgroup.c | 2 +- block/blk-ioc.c| 4 ++-- block/blk-tag.c| 8 block/bsg.c| 9 + block/cfq-iosched.c| 4 ++-- fs/btrfs/volumes.c | 2 +- include/linux/bio.h| 4 ++-- include/linux/blk-cgroup.h | 11 ++- include/linux/blk_types.h | 3 ++- include/linux/blkdev.h | 3 ++- include/linux/iocontext.h | 7 --- 12 files changed, 34 insertions(+), 29 deletions(-) -- 2.7.4
[PATCH 2/5] block: convert blk_queue_tag.refcnt from atomic_t to refcount_t
refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena ReshetovaSigned-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor --- block/blk-tag.c| 8 include/linux/blkdev.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-tag.c b/block/blk-tag.c index 07cc329..d83555e 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(blk_queue_find_tag); */ void blk_free_tags(struct blk_queue_tag *bqt) { - if (atomic_dec_and_test(>refcnt)) { + if (refcount_dec_and_test(>refcnt)) { BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) < bqt->max_depth); @@ -130,7 +130,7 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, if (init_tag_map(q, tags, depth)) goto fail; - atomic_set(>refcnt, 1); + refcount_set(>refcnt, 1); tags->alloc_policy = alloc_policy; tags->next_tag = 0; return tags; @@ -180,7 +180,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, queue_flag_set(QUEUE_FLAG_QUEUED, q); return 0; } else - atomic_inc(>refcnt); + refcount_inc(>refcnt); /* * assign it, all done @@ -225,7 +225,7 @@ int blk_queue_resize_tags(struct request_queue *q, int new_depth) * Currently cannot replace a shared tag map with a new * one, so error out if this is the case */ - if (atomic_read(>refcnt) != 1) + if (refcount_read(>refcnt) != 1) return -EBUSY; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aecca0e..95ef11c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -25,6 +25,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -288,7 +289,7 @@ struct blk_queue_tag { unsigned long *tag_map; /* bit map of free/busy tags */ int max_depth; /* what we will send to device */ int real_max_depth; /* what the array can hold */ - atomic_t refcnt;/* map can be shared */ + refcount_t refcnt; /* map can be shared */ int alloc_policy; /* tag allocation policy */ int next_tag; /* next tag */ }; -- 2.7.4
[PATCH 1/5] block: convert bio.__bi_cnt from atomic_t to refcount_t
refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena ReshetovaSigned-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor --- block/bio.c | 6 +++--- fs/btrfs/volumes.c| 2 +- include/linux/bio.h | 4 ++-- include/linux/blk_types.h | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/block/bio.c b/block/bio.c index 5eec5e0..3dffc17 100644 --- a/block/bio.c +++ b/block/bio.c @@ -275,7 +275,7 @@ void bio_init(struct bio *bio, struct bio_vec *table, { memset(bio, 0, sizeof(*bio)); atomic_set(>__bi_remaining, 1); - atomic_set(>__bi_cnt, 1); + refcount_set(>__bi_cnt, 1); bio->bi_io_vec = table; bio->bi_max_vecs = max_vecs; @@ -543,12 +543,12 @@ void bio_put(struct bio *bio) if (!bio_flagged(bio, BIO_REFFED)) bio_free(bio); else { - BIO_BUG_ON(!atomic_read(>__bi_cnt)); + BIO_BUG_ON(!refcount_read(>__bi_cnt)); /* * last put frees it */ - if (atomic_dec_and_test(>__bi_cnt)) + if (refcount_dec_and_test(>__bi_cnt)) bio_free(bio); } } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 13e55d1..ff1fe9d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -441,7 +441,7 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) waitqueue_active(_info->async_submit_wait)) wake_up(_info->async_submit_wait); - BUG_ON(atomic_read(>__bi_cnt) == 0); + BUG_ON(refcount_read(>__bi_cnt) == 0); /* * if we're doing the sync list, record that our diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e52119..44ac678 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -234,7 +234,7 @@ static inline void bio_get(struct bio *bio) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb__before_atomic(); - atomic_inc(>__bi_cnt); + refcount_inc(>__bi_cnt); } static inline void bio_cnt_set(struct bio *bio, unsigned int count) @@ -243,7 +243,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) bio->bi_flags |= (1 << BIO_REFFED); smp_mb__before_atomic(); } - atomic_set(>__bi_cnt, count); + refcount_set(>__bi_cnt, count); } static inline bool bio_flagged(struct bio *bio, unsigned int bit) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d703acb..c41407f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -7,6 +7,7 @@ #include #include +#include struct bio_set; struct bio; @@ -73,7 +74,7 @@ struct bio { unsigned short bi_max_vecs;/* max bvl_vecs we can hold */ - atomic_t__bi_cnt; /* pin count */ + refcount_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ -- 2.7.4
[PATCH 3/5] block: convert blkcg_gq.refcnt from atomic_t to refcount_t
refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena ReshetovaSigned-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor --- block/blk-cgroup.c | 2 +- include/linux/blk-cgroup.h | 11 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 295e98c2..75de844 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -106,7 +106,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(>q_node); blkg->blkcg = blkcg; - atomic_set(>refcnt, 1); + refcount_set(>refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != _root) { diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 01b62e7..0d3efa9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -19,6 +19,7 @@ #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH(INT_MAX / 2) @@ -122,7 +123,7 @@ struct blkcg_gq { struct request_list rl; /* reference count */ - atomic_trefcnt; + refcount_t refcnt; /* is this blkg online? protected by both blkcg and q locks */ boolonline; @@ -354,8 +355,8 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(>refcnt) <= 0); - atomic_inc(>refcnt); + WARN_ON_ONCE(refcount_read(>refcnt) == 0); + refcount_inc(>refcnt); } void __blkg_release_rcu(struct rcu_head *rcu); @@ -366,8 +367,8 @@ void __blkg_release_rcu(struct rcu_head *rcu); */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(>refcnt) <= 0); - if (atomic_dec_and_test(>refcnt)) + WARN_ON_ONCE(refcount_read(>refcnt) == 0); + if (refcount_dec_and_test(>refcnt)) call_rcu(>rcu_head, __blkg_release_rcu); } -- 2.7.4
[PATCH 5/5] block: convert bsg_device.ref_count from atomic_t to refcount_t
refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena ReshetovaSigned-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor --- block/bsg.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 74835db..6d0ceb9 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,7 @@ struct bsg_device { struct list_head busy_list; struct list_head done_list; struct hlist_node dev_list; - atomic_t ref_count; + refcount_t ref_count; int queued_cmds; int done_cmds; wait_queue_head_t wq_done; @@ -711,7 +712,7 @@ static int bsg_put_device(struct bsg_device *bd) mutex_lock(_mutex); - do_free = atomic_dec_and_test(>ref_count); + do_free = refcount_dec_and_test(>ref_count); if (!do_free) { mutex_unlock(_mutex); goto out; @@ -763,7 +764,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, bsg_set_block(bd, file); - atomic_set(>ref_count, 1); + refcount_set(>ref_count, 1); mutex_lock(_mutex); hlist_add_head(>dev_list, bsg_dev_idx_hash(iminor(inode))); @@ -783,7 +784,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { if (bd->queue == q) { - atomic_inc(>ref_count); + refcount_inc(>ref_count); goto found; } } -- 2.7.4
[PATCH 4/5] block: convert io_context.active_ref from atomic_t to refcount_t
refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena ReshetovaSigned-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor --- block/blk-ioc.c | 4 ++-- block/cfq-iosched.c | 4 ++-- include/linux/iocontext.h | 7 --- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index b12f9c8..130dc23 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -173,7 +173,7 @@ void put_io_context_active(struct io_context *ioc) unsigned long flags; struct io_cq *icq; - if (!atomic_dec_and_test(>active_ref)) { + if (!refcount_dec_and_test(>active_ref)) { put_io_context(ioc); return; } @@ -256,7 +256,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) /* initialize */ atomic_long_set(>refcount, 1); atomic_set(>nr_tasks, 1); - atomic_set(>active_ref, 1); + refcount_set(>active_ref, 1); spin_lock_init(>lock); INIT_RADIX_TREE(>icq_tree, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(>icq_list); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9212627..2871bb9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2959,7 +2959,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * task has exited, don't wait */ cic = cfqd->active_cic; - if (!cic || !atomic_read(>icq.ioc->active_ref)) + if (!cic || !refcount_read(>icq.ioc->active_ref)) return; /* @@ -3959,7 +3959,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq->next_rq && req_noidle(cfqq->next_rq)) enable_idle = 0; - else if (!atomic_read(>icq.ioc->active_ref) || + else if (!refcount_read(>icq.ioc->active_ref) || !cfqd->cfq_slice_idle || (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index df38db2..a1e28c3 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -3,6 +3,7 @@ #include #include +#include #include enum { @@ -96,7 +97,7 @@ struct io_cq { */ struct io_context { atomic_long_t refcount; - atomic_t active_ref; + refcount_t active_ref; atomic_t nr_tasks; /* all the fields below are protected by this lock */ @@ -128,9 +129,9 @@ struct io_context { static inline void get_io_context_active(struct io_context *ioc) { WARN_ON_ONCE(atomic_long_read(>refcount) <= 0); - WARN_ON_ONCE(atomic_read(>active_ref) <= 0); + WARN_ON_ONCE(refcount_read(>active_ref) == 0); atomic_long_inc(>refcount); - atomic_inc(>active_ref); + refcount_inc(>active_ref); } static inline void ioc_task_link(struct io_context *ioc) -- 2.7.4