diff --git a/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c b/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
index bca7b4d..a10c23f 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
@@ -77,5 +77,6 @@ uint64_t read_be64_3 (void)
 	 | ((uint64_t) *(data + 1) << 48) | ((uint64_t) *data << 56);
 }
 
+/* { dg-final { scan-tree-dump-times "64 bit load in host endianness found at" 3 "bswap" } } */
 /* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
index 4dcd3e3..b57cd92 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
@@ -60,5 +60,6 @@ uint32_t read_be16_3 (void)
   return *(data + 1) | (*data << 8);
 }
 
+/* { dg-final { scan-tree-dump-times "16 bit load in host endianness found at" 3 "bswap" } } */
 /* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 3 "bswap" } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
index b365b96..e59b310 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
@@ -62,5 +62,6 @@ uint32_t read_be32_3 (void)
 	 | (*data << 24);
 }
 
+/* { dg-final { scan-tree-dump-times "32 bit load in host endianness found at" 3 "bswap" } } */
 /* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 930d612..6fea45b 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -171,15 +171,15 @@ static struct
 
 static struct
 {
-  /* Number of hand-written 16-bit bswaps found.  */
+  /* Number of hand-written 16-bit nop / bswaps found.  */
   int found_16bit;
 
-  /* Number of hand-written 32-bit bswaps found.  */
+  /* Number of hand-written 32-bit nop / bswaps found.  */
   int found_32bit;
 
-  /* Number of hand-written 64-bit bswaps found.  */
+  /* Number of hand-written 64-bit nop / bswaps found.  */
   int found_64bit;
-} bswap_stats;
+} nop_stats, bswap_stats;
 
 static struct
 {
@@ -1631,6 +1631,12 @@ struct symbolic_number {
   unsigned HOST_WIDE_INT range;
 };
 
+/* The number which the find_bswap_or_nop_1 result should match in
+   order to have a nop.  The number is masked according to the size of
+   the symbolic number before using it.  */
+#define CMPNOP (sizeof (HOST_WIDEST_INT) < 8 ? 0 : \
+  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201)
+
 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
    number N.  Return false if the requested operation is not permitted
    on a symbolic number.  */
@@ -1690,12 +1696,12 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
   return true;
 }
 
-/* Check if STMT might be a byte swap from a memory source and returns the
-   answer. If so, REF is that memory source and the base of the memory area
+/* Check if STMT might be a byte swap or a nop from a memory source and returns
+   the answer. If so, REF is that memory source and the base of the memory area
    accessed and the offset of the access from that base are recorded in N.  */
 
 bool
-find_bswap_load (gimple stmt, tree ref, struct symbolic_number *n)
+find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
 {
   /* Leaf node is an array or component ref. Memorize its base and
      offset from base to compare to other such leaf node.  */
@@ -1721,13 +1727,13 @@ find_bswap_load (gimple stmt, tree ref, struct symbolic_number *n)
   return true;
 }
 
-/* find_bswap_1 invokes itself recursively with N and tries to perform
-   the operation given by the rhs of STMT on the result.  If the
-   operation could successfully be executed the function returns the
-   tree expression of the source operand and NULL otherwise.  */
+/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
+   the operation given by the rhs of STMT on the result.  If the operation
+   could successfully be executed the function returns the tree expression of
+   the source operand and NULL otherwise.  */
 
 static tree
-find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
+find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
 {
   enum tree_code code;
   tree rhs1, rhs2 = NULL;
@@ -1740,7 +1746,7 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 
   rhs1 = gimple_assign_rhs1 (stmt);
 
-  if (find_bswap_load (stmt, rhs1, n))
+  if (find_bswap_or_nop_load (stmt, rhs1, n))
     return rhs1;
 
   if (TREE_CODE (rhs1) != SSA_NAME)
@@ -1769,10 +1775,10 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 	  && code != CONVERT_EXPR)
 	return NULL_TREE;
 
-      source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
+      source_expr1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
 
-      /* If find_bswap_1 returned NULL, STMT is a leaf node and we have
-	 to initialize the symbolic number.  */
+      /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
+	 we have to initialize the symbolic number.  */
       if (!source_expr1 || gimple_assign_load_p (rhs1_stmt))
 	{
 	  /* Set up the symbolic number N by setting each byte to a
@@ -1784,8 +1790,7 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
 	    return NULL_TREE;
 	  n->size /= BITS_PER_UNIT;
 	  n->range = n->size;
-	  n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
-		  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
+	  n->n = CMPNOP;
 
 	  if (n->size < (int)sizeof (HOST_WIDEST_INT))
 	    n->n &= ((unsigned HOST_WIDEST_INT)1 <<
@@ -1864,12 +1869,12 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
       switch (code)
 	{
 	case BIT_IOR_EXPR:
-	  source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
+	  source_expr1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
 
 	  if (!source_expr1)
 	    return NULL_TREE;
 
-	  source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
+	  source_expr2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
 
 	  if (n1.size != n2.size || !source_expr2)
 	    return NULL_TREE;
@@ -1961,21 +1966,24 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
   return NULL_TREE;
 }
 
-/* Check if STMT completes a bswap implementation consisting of ORs,
-   SHIFTs and ANDs.  If the source lies in memory, it also sets
-   *ALIAS_SET to the alias-set of the memory reference, *VUSE to its
-   VUSE and *SIZE to the size of the load needed.  At last, the
-   function returns the source tree expression.  */
+/* Check if STMT completes a bswap implementation or a read in a given
+   endianness consisting of ORs, SHIFTs and ANDs and sets *swap
+   accordingly.  If the source lies in memory, it also sets *ALIAS_SET
+   to the alias-set of the memory reference, *VUSE to its VUSE and
+   *SIZE to the size of the load needed.  At last, the function
+   returns the source tree expression.  */
 
 static tree
-find_bswap (gimple stmt, tree *alias_set, tree *vuse, int *size)
+find_bswap_or_nop (gimple stmt, tree *alias_set, tree *vuse, bool *swap,
+		   int *size)
 {
-/* The number which the find_bswap result should match in order to
-   have a full byte swap.  The number is shifted to the right according
-   to the size of the symbolic number before using it.  */
-  unsigned HOST_WIDEST_INT cmp =
+/* The number which the find_bswap_or_nop_1 result should match in order
+   to have a full byte swap.  The number is shifted to the right
+   according to the size of the symbolic number before using it.  */
+  unsigned HOST_WIDEST_INT cmpxchg =
     sizeof (HOST_WIDEST_INT) < 8 ? 0 :
     (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
+  unsigned HOST_WIDEST_INT cmpnop = CMPNOP;
 
   struct symbolic_number n;
   tree source_expr;
@@ -1988,12 +1996,12 @@ find_bswap (gimple stmt, tree *alias_set, tree *vuse, int *size)
      in libgcc, and for initial shift/and operation of the src operand.  */
   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
-  source_expr =  find_bswap_1 (stmt, &n, limit);
+  source_expr =  find_bswap_or_nop_1 (stmt, &n, limit);
 
   if (!source_expr)
     return NULL_TREE;
 
-  /* Zero out the extra bits of N and CMP.  */
+  /* Zero out the extra bits of N and CMP*.  */
   if (n.size < (int)sizeof (HOST_WIDEST_INT))
     {
       int tmpn;
@@ -2003,14 +2011,16 @@ find_bswap (gimple stmt, tree *alias_set, tree *vuse, int *size)
       n.n &= mask;
       /* Find real size of result (highest non zero byte).  */
       for (tmpn = n.n, rsize = 0; tmpn; tmpn >>= BITS_PER_UNIT, rsize++);
-      cmp >>= (sizeof (HOST_WIDEST_INT) - rsize) * BITS_PER_UNIT;
+      cmpxchg >>= (sizeof (HOST_WIDEST_INT) - rsize) * BITS_PER_UNIT;
+      cmpnop &= mask >> ((n.size - rsize) * BITS_PER_UNIT);
     }
   else
     rsize = n.size;
 
   /* A complete byte swap should make the symbolic number to start
-     with the largest digit in the highest order byte.  */
-  if (cmp != n.n)
+     with the largest digit in the highest order byte. Unchanged symbolic
+     number indicates a read with same endianness as host architecture.  */
+  if (n.n != cmpnop && n.n != cmpxchg)
     return NULL_TREE;
 
   *alias_set = NULL_TREE;
@@ -2023,6 +2033,15 @@ find_bswap (gimple stmt, tree *alias_set, tree *vuse, int *size)
     }
   else if (rsize != n.size)
     return NULL_TREE;
+  else if (n.n == cmpnop)
+    /* Useless bit manipulation performed by code.  */
+    return NULL_TREE;
+
+
+  if (n.n == cmpxchg)
+    *swap = 1;
+  else
+    *swap = 0;
 
   *size = n.size * BITS_PER_UNIT;
   return source_expr;
@@ -2062,20 +2081,23 @@ public:
 }; // class pass_optimize_bswap
 
 /* Perform the bswap optimization: replace the statement STMT at GSI
-   by a load of type LOAD_TYPE with VUSE and set-alias ALIAS_TYPE if
-   a memory source is involved (ALIAS_TYPE is non null), followed by
-   the builtin bswap invocation in FNDECL. SRC gives the source on
-   which STMT is operating a byteswap and TYPE_SIZE gives the size of
-   the expression involved for maintaining some statistics.  */
+   by a load of type LOAD_TYPE wit VUSE and set-alias ALIAS_TYPE if a
+   memory source is involved (ALIAS_TYPE is non null), followed by
+   the builtin bswap invocation in FNDECL if SWAP is true. SRC gives
+   the source on which STMT is operating a byteswap and TYPE_SIZE
+   gives the size of the expression involved for maintaining some
+   statistics.  */
 
 static void
-bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree bswap_src,
-	       tree fndecl, tree bswap_type, tree load_type, tree alias_type,
-	       tree vuse, int type_size)
+bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree src, tree fndecl,
+	       tree bswap_type, tree load_type, tree alias_type, tree vuse,
+	       int type_size, bool swap)
 {
-  tree bswap_tmp;
+  tree tmp, tgt;
   gimple call;
 
+  tgt = gimple_assign_lhs (stmt);
+
   /* Need to load the value from memory first.  */
   if (alias_type)
     {
@@ -2085,7 +2107,7 @@ bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree bswap_src,
 
       /*  Compute address to load from and cast according to the size
 	  of the load.  */
-      addr_expr = build_fold_addr_expr (unshare_expr (bswap_src));
+      addr_expr = build_fold_addr_expr (unshare_expr (src));
       if (is_gimple_min_invariant (addr_expr))
 	addr_tmp = addr_expr;
       else
@@ -2098,13 +2120,50 @@ bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree bswap_src,
 
       /* Perform the load.  */
       load_offset_ptr = build_int_cst (alias_type, 0);
-      val_tmp = make_temp_ssa_name (load_type, NULL, "load_dst");
       val_expr = fold_build2 (MEM_REF, load_type, addr_tmp,
 			      load_offset_ptr);
-      load_stmt = gimple_build_assign (val_tmp, val_expr);
-      gimple_set_vuse (load_stmt, vuse);
-      gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
-      bswap_src = val_tmp;
+
+      if (!swap)
+	{
+	  if (type_size == 16)
+	    nop_stats.found_16bit++;
+	  else if (type_size == 32)
+	    nop_stats.found_32bit++;
+	  else
+	    nop_stats.found_64bit++;
+
+	  /* Convert the result of load if necessary.  */
+	  if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+	    {
+	      val_tmp = make_temp_ssa_name (load_type, NULL, "load_dst");
+	      load_stmt = gimple_build_assign (val_tmp, val_expr);
+	      gimple_set_vuse (load_stmt, vuse);
+	      gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
+	      gimple_assign_set_rhs_with_ops_1 (gsi, NOP_EXPR, val_tmp,
+						NULL_TREE, NULL_TREE);
+	    }
+	  else
+	    gimple_assign_set_rhs_with_ops_1 (gsi, MEM_REF, val_expr,
+					      NULL_TREE, NULL_TREE);
+	  update_stmt (gsi_stmt (*gsi));
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file,
+		       "%d bit load in host endianness found at: ",
+		       (int)type_size);
+	      print_gimple_stmt (dump_file, stmt, 0, 0);
+	    }
+	  return;
+	}
+      else
+	{
+	  val_tmp = make_temp_ssa_name (load_type, NULL, "load_dst");
+	  load_stmt = gimple_build_assign (val_tmp, val_expr);
+	  gimple_set_vuse (load_stmt, vuse);
+	  gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
+	}
+      src = val_tmp;
     }
 
   if (type_size == 16)
@@ -2114,34 +2173,31 @@ bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree bswap_src,
   else
     bswap_stats.found_64bit++;
 
-  bswap_tmp = bswap_src;
+  tmp = src;
 
   /* Convert the src expression if necessary.  */
-  if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
+  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
     {
       gimple convert_stmt;
-      bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
-      convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, bswap_tmp,
-						   bswap_src, NULL);
+      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+      convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src, NULL);
       gsi_insert_before (gsi, convert_stmt, GSI_SAME_STMT);
     }
 
-  call = gimple_build_call (fndecl, 1, bswap_tmp);
+  call = gimple_build_call (fndecl, 1, tmp);
 
-  bswap_tmp = gimple_assign_lhs (stmt);
+  tmp = tgt;
 
   /* Convert the result if necessary.  */
-  if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
+  if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
     {
       gimple convert_stmt;
-      bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
-      convert_stmt = gimple_build_assign_with_ops (NOP_EXPR,
-						   gimple_assign_lhs (stmt),
-						   bswap_tmp, NULL);
+      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
+      convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tgt, tmp, NULL);
       gsi_insert_after (gsi, convert_stmt, GSI_SAME_STMT);
     }
 
-  gimple_call_set_lhs (call, bswap_tmp);
+  gimple_call_set_lhs (call, tmp);
 
   if (dump_file)
     {
@@ -2154,8 +2210,10 @@ bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree bswap_src,
   gsi_remove (gsi, true);
 }
 
-/* Find manual byte swap implementations and turn them into a bswap
-   builtin invokation.  */
+/* Find manual byte swap implementations as well as load in a given
+   endianness. Byte swaps are turned into a bswap builtin invokation
+   while endian loads are converted to bswap builtin invokation or
+   simple load according to the host endianness.  */
 
 unsigned int
 pass_optimize_bswap::execute (function *fun)
@@ -2179,9 +2237,6 @@ pass_optimize_bswap::execute (function *fun)
 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
 		   || (bswap32_p && word_mode == SImode)));
 
-  if (!bswap16_p && !bswap32_p && !bswap64_p)
-    return 0;
-
   /* Determine the argument type of the builtins.  The code later on
      assumes that the return and argument type are the same.  */
   if (bswap16_p)
@@ -2202,6 +2257,7 @@ pass_optimize_bswap::execute (function *fun)
       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
     }
 
+  memset (&nop_stats, 0, sizeof (nop_stats));
   memset (&bswap_stats, 0, sizeof (bswap_stats));
 
   FOR_EACH_BB_FN (bb, fun)
@@ -2215,17 +2271,19 @@ pass_optimize_bswap::execute (function *fun)
       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
         {
 	  gimple stmt = gsi_stmt (gsi);
-	  tree bswap_src, bswap_type, load_type, alias_type, vuse = NULL_TREE;
-	  tree fndecl = NULL_TREE;
+	  tree src, load_type, alias_type;
+	  tree fndecl = NULL_TREE, vuse = NULL_TREE, bswap_type = NULL_TREE;
 	  int type_size;
+	  bool swap;
 
 	  if (!is_gimple_assign (stmt)
 	      || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
 	    continue;
 
-	  bswap_src = find_bswap (stmt, &alias_type, &vuse, &type_size);
+	  src = find_bswap_or_nop (stmt, &alias_type, &vuse, &swap,
+				   &type_size);
 
-	  if (!bswap_src)
+	  if (!src)
 	    continue;
 
 	  switch (type_size)
@@ -2258,15 +2316,21 @@ pass_optimize_bswap::execute (function *fun)
 	      continue;
 	    }
 
-	  if (!fndecl)
+	  if (swap && !fndecl)
 	    continue;
 
-	  bswap_replace (stmt, &gsi, bswap_src, fndecl, bswap_type, load_type,
-			 alias_type, vuse, type_size);
+	  bswap_replace (stmt, &gsi, src, fndecl, bswap_type, load_type,
+			 alias_type, vuse, type_size, swap);
 	  changed = true;
 	}
     }
 
+  statistics_counter_event (fun, "16-bit nop implementations found",
+			    nop_stats.found_16bit);
+  statistics_counter_event (fun, "32-bit nop implementations found",
+			    nop_stats.found_32bit);
+  statistics_counter_event (fun, "64-bit nop implementations found",
+			    nop_stats.found_64bit);
   statistics_counter_event (fun, "16-bit bswap implementations found",
 			    bswap_stats.found_16bit);
   statistics_counter_event (fun, "32-bit bswap implementations found",
