Re: [PATCH 02/28] crypto: omap-sham: Don't idle/start SHA device between Encrypt operations

2016-06-07 Thread Grygorii Strashko

On 06/07/2016 02:52 PM, Tero Kristo wrote:

On 07/06/16 13:08, Herbert Xu wrote:

On Wed, Jun 01, 2016 at 06:03:52PM -0500, Dave Gerlach wrote:

On 06/01/2016 04:53 AM, Grygorii Strashko wrote:

On 06/01/2016 11:56 AM, Tero Kristo wrote:

From: Lokesh Vutla 

Calling runtime PM API for every block causes serious perf hit to
crypto operations that are done on a long buffer.
As crypto is performed on a page boundary, encrypting large buffers
can
cause a series of crypto operations divided by page. The runtime PM
API
is also called those many times.

We call runtime_pm_get_sync only at beginning on the session
(cra_init)
and runtime_pm_put at the end. This result in upto a 50% speedup.
This doesn't make the driver to keep the system awake as runtime
get/put
is only called during a crypto session which completes usually
quickly.

Signed-off-by: Lokesh Vutla 
Signed-off-by: Tero Kristo 
---
  drivers/crypto/omap-sham.c | 27 +--
  1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 6eefaa2..bd0258f 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -360,14 +360,6 @@ static void omap_sham_copy_ready_hash(struct
ahash_request *req)

  static int omap_sham_hw_init(struct omap_sham_dev *dd)
  {
-int err;
-
-err = pm_runtime_get_sync(dd->dev);
-if (err < 0) {
-dev_err(dd->dev, "failed to get sync: %d\n", err);
-return err;
-}
-


Would it be worth it to investigate a pm_runtime autosuspend
approach rather than knocking runtime PM out here completely? I am
not clear if the overhead is coming from the pm_runtime calls
themselves or the actual idling of the IP, but if it's the idling of
the IP causing the slowdown, with a large enough autosuspend_delay
we don't actually sleep between each block but after a long enough
period of idle time we would actually suspend.


Indeed, I think this patch is bogus.  cra_init is associated
with the tfm object which is usually long-lived.  So doing power
management there makes no sense.

Cheers,



I can investigate this further, but I believe this patch itself gave a
noticeable performance boost.

This is an optimization anyway, and not critical for functionality.



It is not critical only if below code would not introduce races
+   spin_lock_bh(&sham.lock);
+   list_for_each_entry(dd, &sham.dev_list, list) {
+   break;
+   }
+   spin_unlock_bh(&sham.lock);

Is it guaranteed that dd will alive always at this moment?

+
+   pm_runtime_get_sync(dd->dev);



--
regards,
-grygorii
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 02/28] crypto: omap-sham: Don't idle/start SHA device between Encrypt operations

2016-06-01 Thread Grygorii Strashko

On 06/01/2016 11:56 AM, Tero Kristo wrote:

From: Lokesh Vutla 

Calling runtime PM API for every block causes serious perf hit to
crypto operations that are done on a long buffer.
As crypto is performed on a page boundary, encrypting large buffers can
cause a series of crypto operations divided by page. The runtime PM API
is also called those many times.

We call runtime_pm_get_sync only at beginning on the session (cra_init)
and runtime_pm_put at the end. This result in upto a 50% speedup.
This doesn't make the driver to keep the system awake as runtime get/put
is only called during a crypto session which completes usually quickly.

Signed-off-by: Lokesh Vutla 
Signed-off-by: Tero Kristo 
---
  drivers/crypto/omap-sham.c | 27 +--
  1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 6eefaa2..bd0258f 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -360,14 +360,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request 
*req)

  static int omap_sham_hw_init(struct omap_sham_dev *dd)
  {
-   int err;
-
-   err = pm_runtime_get_sync(dd->dev);
-   if (err < 0) {
-   dev_err(dd->dev, "failed to get sync: %d\n", err);
-   return err;
-   }
-
if (!test_bit(FLAGS_INIT, &dd->flags)) {
set_bit(FLAGS_INIT, &dd->flags);
dd->err = 0;
@@ -999,8 +991,6 @@ static void omap_sham_finish_req(struct ahash_request *req, 
int err)
dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) |
BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY));

-   pm_runtime_put(dd->dev);
-
if (req->base.complete)
req->base.complete(&req->base, err);

@@ -1239,6 +1229,7 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, 
const char *alg_base)
  {
struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
const char *alg_name = crypto_tfm_alg_name(tfm);
+   struct omap_sham_dev *dd;

/* Allocate a fallback and abort if it failed. */
tctx->fallback = crypto_alloc_shash(alg_name, 0,
@@ -1266,6 +1257,13 @@ static int omap_sham_cra_init_alg(struct crypto_tfm 
*tfm, const char *alg_base)

}

+   spin_lock_bh(&sham.lock);
+   list_for_each_entry(dd, &sham.dev_list, list) {
+   break;
+   }
+   spin_unlock_bh(&sham.lock);
+
+   pm_runtime_get_sync(dd->dev);
return 0;
  }

@@ -1307,6 +1305,7 @@ static int omap_sham_cra_sha512_init(struct crypto_tfm 
*tfm)
  static void omap_sham_cra_exit(struct crypto_tfm *tfm)
  {
struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+   struct omap_sham_dev *dd;

crypto_free_shash(tctx->fallback);
tctx->fallback = NULL;
@@ -1315,6 +1314,14 @@ static void omap_sham_cra_exit(struct crypto_tfm *tfm)
struct omap_sham_hmac_ctx *bctx = tctx->base;
crypto_free_shash(bctx->shash);
}
+
+   spin_lock_bh(&sham.lock);
+   list_for_each_entry(dd, &sham.dev_list, list) {
+   break;
+   }
+   spin_unlock_bh(&sham.lock);
+
+   pm_runtime_get_sync(dd->dev);


May be put_?


  }

  static struct ahash_alg algs_sha1_md5[] = {




--
regards,
-grygorii
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] hw_random: omap3-rom-rng: convert timer to delayed work

2015-11-06 Thread Grygorii Strashko

On 11/06/2015 12:15 AM, Aaro Koskinen wrote:

We cannot put the HW RNG to idle using a timer because we cannot disable
clocks from atomic context. Use a delayed work instead.

Fixes a warning with CONFIG_DEBUG_MUTEXES on Nokia N900 during boot.

Reported-by: Sebastian Reichel 
Signed-off-by: Aaro Koskinen 
---
  drivers/char/hw_random/omap3-rom-rng.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/char/hw_random/omap3-rom-rng.c 
b/drivers/char/hw_random/omap3-rom-rng.c
index a405cdc..58191c6 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -29,11 +29,11 @@
  /* param1: ptr, param2: count, param3: flag */
  static u32 (*omap3_rom_rng_call)(u32, u32, u32);

-static struct timer_list idle_timer;
+static struct delayed_work idle_work;
  static int rng_idle;
  static struct clk *rng_clk;

-static void omap3_rom_rng_idle(unsigned long data)
+static void omap3_rom_rng_idle(struct work_struct *work)
  {
int r;

@@ -51,7 +51,7 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int 
count)
u32 r;
u32 ptr;

-   del_timer_sync(&idle_timer);
+   cancel_delayed_work_sync(&idle_work);
if (rng_idle) {
clk_prepare_enable(rng_clk);
r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
@@ -65,7 +65,7 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int 
count)

ptr = virt_to_phys(buf);
r = omap3_rom_rng_call(ptr, count, RNG_GEN_HW);
-   mod_timer(&idle_timer, jiffies + msecs_to_jiffies(500));
+   schedule_delayed_work(&idle_work, msecs_to_jiffies(500));
if (r != 0)
return -EINVAL;
return 0;
@@ -102,7 +102,7 @@ static int omap3_rom_rng_probe(struct platform_device *pdev)
return -EINVAL;
}

-   setup_timer(&idle_timer, omap3_rom_rng_idle, 0);
+   INIT_DELAYED_WORK(&idle_work, omap3_rom_rng_idle);
rng_clk = devm_clk_get(&pdev->dev, "ick");
if (IS_ERR(rng_clk)) {
pr_err("unable to get RNG clock\n");
@@ -118,6 +118,7 @@ static int omap3_rom_rng_probe(struct platform_device *pdev)

  static int omap3_rom_rng_remove(struct platform_device *pdev)
  {
+   cancel_delayed_work_sync(&idle_work);
hwrng_unregister(&omap3_rom_rng_ops);
clk_disable_unprepare(rng_clk);
return 0;



Sry, It looks like PM runtime autosuspend might work well here.
What do you think?

--
regards,
-grygorii
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html