Memory key generation modified to optimize large messages transfer in Sinai. Performance gain will be in effect for messages > 80 KByte on sinai DDR. The enhancement works when the HCA memory key table configured is up to and including 2^23. For larger tables the enhancement is off.
Signed-off-by: Eli Cohen <[EMAIL PROTECTED]> Signed-off-by: Michael Tsirkin <[EMAIL PROTECTED]> Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_profile.c =================================================================== --- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_profile.c +++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_profile.c @@ -153,8 +153,8 @@ u64 mthca_make_profile(struct mthca_dev "won't in 0x%llx bytes of context memory.\n", (unsigned long long) total_size, (unsigned long long) mem_avail); - kfree(profile); - return -ENOMEM; + total_size = -ENOMEM; + goto exit; } if (profile[i].size) @@ -260,6 +260,12 @@ u64 mthca_make_profile(struct mthca_dev */ dev->limits.num_pds = MTHCA_NUM_PDS; + if ((dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) && init_hca->log_mpt_sz > 23) { + mthca_warn(dev, "MPT table too large - disabling mkey optimization " + "for Sinai\n"); + dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT; + } + /* * For Tavor, FMRs use ioremapped PCI memory. For 32 bit * systems it may use too much vmalloc space to map all MTT @@ -272,6 +278,7 @@ u64 mthca_make_profile(struct mthca_dev else dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts; +exit: kfree(profile); return total_size; } Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_main.c +++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_main.c @@ -935,13 +935,16 @@ enum { static struct { u64 latest_fw; - int is_memfree; - int is_pcie; + u32 flags; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, - [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), + .flags = 0 }, + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), + .flags = MTHCA_FLAG_PCIE }, + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), + .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, + [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), + .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE | MTHCA_FLAG_SINAI_OPT } }; static int __devinit mthca_init_one(struct pci_dev *pdev, @@ -1031,12 +1034,9 @@ static int __devinit mthca_init_one(stru mdev->pdev = pdev; + mdev->mthca_flags = mthca_hca_table[id->driver_data].flags; if (ddr_hidden) mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; - if (mthca_hca_table[id->driver_data].is_memfree) - mdev->mthca_flags |= MTHCA_FLAG_MEMFREE; - if (mthca_hca_table[id->driver_data].is_pcie) - mdev->mthca_flags |= MTHCA_FLAG_PCIE; /* * Now reset the HCA before we touch the PCI capabilities or Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_dev.h =================================================================== --- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_dev.h +++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_dev.h @@ -71,7 +71,8 @@ enum { MTHCA_FLAG_NO_LAM = 1 << 5, MTHCA_FLAG_FMR = 1 << 6, MTHCA_FLAG_MEMFREE = 1 << 7, - MTHCA_FLAG_PCIE = 1 << 8 + MTHCA_FLAG_PCIE = 1 << 8, + MTHCA_FLAG_SINAI_OPT = 1 << 9 }; enum { Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_mr.c =================================================================== --- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_mr.c +++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_mr.c @@ -76,6 +76,8 @@ struct mthca_mpt_entry { #define MTHCA_MPT_STATUS_SW 0xF0 #define MTHCA_MPT_STATUS_HW 0x00 +#define SINAI_FMR_KEY_INC 0x1000000 + /* * Buddy allocator for MTT segments (currently not very efficient * since it doesn't keep a free list and just searches linearly @@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct return tavor_key_to_hw_index(key); } +static inline u32 adjust_key(struct mthca_dev *dev, u32 key) +{ + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + return ((key << 20) & 0x800000) | (key & 0x7fffff); + else + return key; +} + int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { @@ -345,6 +355,7 @@ int mthca_mr_alloc(struct mthca_dev *dev key = mthca_alloc(&dev->mr_table.mpt_alloc); if (key == -1) return -ENOMEM; + key = adjust_key(dev, key); mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); if (mthca_is_memfree(dev)) { @@ -504,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *de key = mthca_alloc(&dev->mr_table.mpt_alloc); if (key == -1) return -ENOMEM; + key = adjust_key(dev, key); idx = key & (dev->limits.num_mpts - 1); mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); @@ -687,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_f ++fmr->maps; key = arbel_key_to_hw_index(fmr->ibmr.lkey); - key += dev->limits.num_mpts; + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + key += SINAI_FMR_KEY_INC; + else + key += dev->limits.num_mpts; fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_cmd.c =================================================================== --- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_cmd.c +++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1277,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev int err; #define INIT_HCA_IN_SIZE 0x200 -#define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_FLAGS1_OFFSET 0x00c +#define INIT_HCA_FLAGS2_OFFSET 0x014 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) @@ -1320,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev memset(inbox, 0, INIT_HCA_IN_SIZE); + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET); + #if defined(__LITTLE_ENDIAN) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1); #else #error Host endianness not defined #endif /* Check port for UD address vector: */ - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1); /* We leave wqe_quota, responder_exu, etc as 0 (default) */ _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general