diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 55083b278da4dacf4e9114ca444bf11b2ae0caf0..0ea4860703e616315acd6d8d4f51c76598f6e3ef 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1833,6 +1833,7 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}.
 @tindex IFN_VEC_WIDEN_MINUS_LO
 @tindex IFN_VEC_WIDEN_MINUS_EVEN
 @tindex IFN_VEC_WIDEN_MINUS_ODD
+@tindex IFN_VEC_TRUNC_ADD_HIGH
 @tindex VEC_UNPACK_HI_EXPR
 @tindex VEC_UNPACK_LO_EXPR
 @tindex VEC_UNPACK_FLOAT_HI_EXPR
@@ -1955,6 +1956,24 @@ vector of @code{N/2} subtractions.  In the case of
 vector are subtracted from the odd @code{N/2} of the first to produce the
 vector of @code{N/2} subtractions.
 
+@item IFN_VEC_TRUNC_ADD_HIGH
+This internal function performs an addition of two input vectors,
+then extracts the most significant half of each result element and
+narrows it to elements of half the original width.
+
+Concretely, it computes:
+@code{(bits(a)/2)((a + b) >> bits(a)/2)}
+
+where @code{bits(a)} is the width in bits of each input element.
+
+Its operands are vectors containing the same number of elements (@code{N})
+of the same integral type.  The result is a vector of length @code{N}, with
+elements of an integral type whose size is half that of the input element
+type.
+
+This operation currently only used for early break result compression when the
+result of a vector boolean can be represented as 0 or -1.
+
 @item VEC_UNPACK_HI_EXPR
 @itemx VEC_UNPACK_LO_EXPR
 These nodes represent unpacking of the high and low parts of the input vector,
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 973c0dd302964966a91fa8dbab85930d6dbeec9e..bac22b6338042a5a546db7854988eab628f08eea 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6087,6 +6087,25 @@ vectors with N signed/unsigned elements of size S@.  Find the absolute
 difference between operands 1 and 2 and widen the resulting elements.
 Put the N/2 results of size 2*S in the output vector (operand 0).
 
+@cindex @code{vec_trunc_add_high@var{m}} instruction pattern
+@item @samp{vec_trunc_add_high@var{m}}
+Signed or unsigned addition of two input integer vectors of mode @var{m}, then
+extracts the most significant half of each result element and narrows it to
+elements of half the original width.
+
+Concretely, it computes:
+@code{(bits(a)/2)((a + b) >> bits(a)/2)}
+
+where @code{bits(a)} is the width in bits of each input element.
+
+Operand 1 and 2 are of integer vector mode @var{m} containing the same number
+of signed or unsigned integral elements.  The result (operand @code{0}) is of an
+integer vector mode with the same number of elements but elements of half of the
+width of those of mode @var{m}.
+
+This operation currently only used for early break result compression when the
+result of a vector boolean can be represented as 0 or -1.
+
 @cindex @code{vec_addsub@var{m}3} instruction pattern
 @item @samp{vec_addsub@var{m}3}
 Alternating subtract, add with even lanes doing subtract and odd
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d2480a1bf7927476215bc7bb99c0b74197d2b7e9..8434a805e289e109c49c53ef887a519112af1f33 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -422,6 +422,8 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
 DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary)
 DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary)
+DEF_INTERNAL_OPTAB_FN (VEC_TRUNC_ADD_HIGH, ECF_CONST | ECF_NOTHROW,
+		       vec_trunc_add_high, binary)
 DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_PLUS,
 				ECF_CONST | ECF_NOTHROW,
 				first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index b59d02bce14cd8cd4392ac568d2547601aac4481..790e43f08f476c8025dc2797f9ecaffe5b66acc5 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -493,6 +493,7 @@ OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
 OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
 OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
 OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
+OPTAB_D (vec_trunc_add_high_optab, "vec_trunc_add_high$a")
 OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
 OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
 OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_1.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..b22e7d9c49d3588fa7e1e2c8eac43074109ccaed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-vect-details -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define TYPE int
+#define N 800
+
+#pragma GCC target "+nosve"
+
+TYPE a[N];
+
+/*
+** foo:
+**	...
+**	ldp	q[0-9]+, q[0-9]+, \[x[0-9]+\], 32
+**	cmeq	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**	cmeq	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**	addhn	v[0-9]+.4h, v[0-9]+.4s, v[0-9]+.4s
+**	fmov	x[0-9]+, d[0-9]+
+**	...
+*/
+
+int foo ()
+{
+#pragma GCC unroll 8
+  for (int i = 0; i < N; i++)
+    if (a[i] == 124)
+      return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "VEC_TRUNC_ADD_HIGH" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_2.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..31d2515dcb907dc32a1eae7a31d89ecd64a06e60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_2.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-vect-details -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define TYPE long long
+#define N 800
+
+#pragma GCC target "+nosve"
+
+TYPE a[N];
+
+/*
+** foo:
+**	...
+**	ldp	q[0-9]+, q[0-9]+, \[x[0-9]+\], 32
+**	cmeq	v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d
+**	cmeq	v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d
+**	addhn	v[0-9]+.2s, v[0-9]+.2d, v[0-9]+.2d
+**	fmov	x[0-9]+, d[0-9]+
+**	...
+*/
+
+int foo ()
+{
+#pragma GCC unroll 4
+  for (int i = 0; i < N; i++)
+    if (a[i] == 124)
+      return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "VEC_TRUNC_ADD_HIGH" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_3.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..375fe1788af76138d0d3798eec1a128e7c8f9a04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_3.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-vect-details -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define TYPE short
+#define N 800
+
+#pragma GCC target "+nosve"
+
+TYPE a[N];
+
+/*
+** foo:
+**	...
+**	ldp	q[0-9]+, q[0-9]+, \[x[0-9]+\], 32
+**	cmeq	v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h
+**	cmeq	v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h
+**	addhn	v[0-9]+.8b, v[0-9]+.8h, v[0-9]+.8h
+**	fmov	x[0-9]+, d[0-9]+
+**	...
+*/
+
+int foo ()
+{
+#pragma GCC unroll 16
+  for (int i = 0; i < N; i++)
+    if (a[i] == 124)
+      return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "VEC_TRUNC_ADD_HIGH" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_4.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..e584bfac6271a07680b09a5aad586f6dbdd53f1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-addhn_4.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-vect-details -std=c99" } */
+
+#define TYPE char
+#define N 800
+
+#pragma GCC target "+nosve"
+
+TYPE a[N];
+
+int foo ()
+{
+#pragma GCC unroll 32
+  for (int i = 0; i < N; i++)
+    if (a[i] == 124)
+      return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "VEC_TRUNC_ADD_HIGH" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 9fcc2fd084987e564f496a02af7d2b8547c11cd5..a945cce0e67a28694ac7016714f9450bfa7b9aa9 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12336,7 +12336,7 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
   gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
   gcond *cond_stmt = as_a <gcond *>(orig_stmt);
 
-  tree cst = build_zero_cst (vectype);
+  tree vectype_out = vectype;
   auto bb = gimple_bb (cond_stmt);
   edge exit_true_edge = EDGE_SUCC (bb, 0);
   if (exit_true_edge->flags & EDGE_FALSE_VALUE)
@@ -12353,10 +12353,37 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
   bool flipped = flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
 					exit_true_edge->dest);
 
+  /* See if we support ADDHN and use that for the reduction.  */
+  internal_fn ifn = IFN_VEC_TRUNC_ADD_HIGH;
+  bool addhn_supported_p
+    = direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_BOTH);
+  tree narrow_type = NULL_TREE;
+  if (addhn_supported_p)
+    {
+      /* Calculate the narrowing type for the result.  */
+      auto halfprec = TYPE_PRECISION (TREE_TYPE (vectype)) / 2;
+      auto unsignedp = TYPE_UNSIGNED (TREE_TYPE (vectype));
+      tree itype = build_nonstandard_integer_type (halfprec, unsignedp);
+      tree tmp_type = build_vector_type (itype, TYPE_VECTOR_SUBPARTS (vectype));
+      narrow_type = truth_type_for (tmp_type);
+
+      if (direct_optab_handler (cbranch_optab, TYPE_MODE (narrow_type))
+	  == CODE_FOR_nothing)
+	{
+	  if (dump_enabled_p ())
+	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			       "can't use ADDHN reduction because cbranch for "
+			       "the narrowed type is not supported by the "
+			       "target.\n");
+	  addhn_supported_p = false;
+	}
+    }
+
   /* Analyze only.  */
   if (cost_vec)
     {
-      if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
+      if (!addhn_supported_p
+	  && direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
 	{
 	  if (dump_enabled_p ())
 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -12462,10 +12489,22 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 
       while (workset.length () > 1)
 	{
-	  new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
 	  tree arg0 = workset.pop ();
 	  tree arg1 = workset.pop ();
-	  new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
+	  if (addhn_supported_p && workset.length () == 0)
+	    {
+	      new_stmt = gimple_build_call_internal (ifn, 2, arg0, arg1);
+	      vectype_out = narrow_type;
+	      new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
+	      gimple_call_set_lhs (as_a <gcall *> (new_stmt), new_temp);
+	      gimple_call_set_nothrow (as_a <gcall *> (new_stmt), true);
+	    }
+	  else
+	    {
+	      new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
+	      new_stmt
+		= gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
+	    }
 	  vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
 				       &cond_gsi);
 	  workset.quick_insert (0, new_temp);
@@ -12488,6 +12527,7 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 
   gcc_assert (new_temp);
 
+  tree cst = build_zero_cst (vectype_out);
   gimple_cond_set_condition (cond_stmt, NE_EXPR, new_temp, cst);
   update_stmt (orig_stmt);