From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai>

Address comments from Richard that splits the patch of fixing multiple-rgroup
handling of length counting elements.

This patch is fixing issue of handling multiple-rgroup of length is counting 
elements

Before this patch, multiple rgroup run fail:
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution 
test
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c execution 
test

After this patch, These tests are all passed.

gcc/ChangeLog:

        * tree-vect-loop.cc (vect_get_loop_len): Fix issue for multiple-rgroup 
of length.
        * tree-vect-stmts.cc (vectorizable_store): Ditto.
        (vectorizable_load): Ditto.
        * tree-vectorizer.h (vect_get_loop_len): Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.c: New test.
        * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.h: New test.
        * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c: New test.
        * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h: New test.
        * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c: New 
test.
        * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c: New 
test.

---
 .../rvv/autovec/partial/multiple_rgroup-1.c   |   6 +
 .../rvv/autovec/partial/multiple_rgroup-1.h   | 304 ++++++++++
 .../rvv/autovec/partial/multiple_rgroup-2.c   |   6 +
 .../rvv/autovec/partial/multiple_rgroup-2.h   | 546 ++++++++++++++++++
 .../autovec/partial/multiple_rgroup_run-1.c   |  19 +
 .../autovec/partial/multiple_rgroup_run-2.c   |  19 +
 gcc/tree-vect-loop.cc                         |  26 +-
 gcc/tree-vect-stmts.cc                        |  28 +-
 gcc/tree-vectorizer.h                         |   5 +-
 9 files changed, 944 insertions(+), 15 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.c
new file mode 100644
index 00000000000..69cc3be78f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=fixed-vlmax" } */
+
+#include "multiple_rgroup-1.h"
+
+TEST_ALL (test_1)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.h
new file mode 100644
index 00000000000..fbc49f4855d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-1.h
@@ -0,0 +1,304 @@
+#include <stddef.h>
+#include <stdint-gcc.h>
+
+#define test_1(TYPE1, TYPE2)                                                   
\
+  void __attribute__ ((noinline, noclone))                                     
\
+  test_1_##TYPE1_##TYPE2 (TYPE1 *__restrict f, TYPE2 *__restrict d, TYPE1 x,   
\
+                         TYPE1 x2, TYPE2 y, int n)                            \
+  {                                                                            
\
+    for (int i = 0; i < n; ++i)                                                
\
+      {                                                                        
\
+       f[i * 2 + 0] = x;                                                      \
+       f[i * 2 + 1] = x2;                                                     \
+       d[i] = y;                                                              \
+      }                                                                        
\
+  }
+
+#define run_1(TYPE1, TYPE2)                                                    
\
+  int n_1_##TYPE1_##TYPE2 = 1;                                                 
\
+  TYPE1 x_1_##TYPE1 = 117;                                                     
\
+  TYPE1 x2_1_##TYPE1 = 232;                                                    
\
+  TYPE2 y_1_##TYPE2 = 9762;                                                    
\
+  TYPE1 f_1_##TYPE1[2 * 2 + 1] = {0};                                          
\
+  TYPE2 d_1_##TYPE2[2] = {0};                                                  
\
+  test_1_##TYPE1_##TYPE2 (f_1_##TYPE1, d_1_##TYPE2, x_1_##TYPE1, x2_1_##TYPE1, 
\
+                         y_1_##TYPE2, n_1_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_1_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_1_##TYPE1[i * 2 + 0] != x_1_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 2 + 1] != x2_1_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_1_##TYPE2[i] != y_1_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_1_##TYPE1_##TYPE2; i < n_1_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_1_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_1_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_2(TYPE1, TYPE2)                                                    
\
+  int n_2_##TYPE1_##TYPE2 = 17;                                                
\
+  TYPE1 x_2_##TYPE1 = 133;                                                     
\
+  TYPE1 x2_2_##TYPE1 = 94;                                                     
\
+  TYPE2 y_2_##TYPE2 = 8672;                                                    
\
+  TYPE1 f_2_##TYPE1[18 * 2 + 1] = {0};                                         
\
+  TYPE2 d_2_##TYPE2[18] = {0};                                                 
\
+  test_1_##TYPE1_##TYPE2 (f_2_##TYPE1, d_2_##TYPE2, x_2_##TYPE1, x2_2_##TYPE1, 
\
+                         y_2_##TYPE2, n_2_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_2_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_2_##TYPE1[i * 2 + 0] != x_2_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 2 + 1] != x2_2_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_2_##TYPE2[i] != y_2_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_2_##TYPE1_##TYPE2; i < n_2_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_2_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_2_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_3(TYPE1, TYPE2)                                                    
\
+  int n_3_##TYPE1_##TYPE2 = 32;                                                
\
+  TYPE1 x_3_##TYPE1 = 233;                                                     
\
+  TYPE1 x2_3_##TYPE1 = 78;                                                     
\
+  TYPE2 y_3_##TYPE2 = 1234;                                                    
\
+  TYPE1 f_3_##TYPE1[33 * 2 + 1] = {0};                                         
\
+  TYPE2 d_3_##TYPE2[33] = {0};                                                 
\
+  test_1_##TYPE1_##TYPE2 (f_3_##TYPE1, d_3_##TYPE2, x_3_##TYPE1, x2_3_##TYPE1, 
\
+                         y_3_##TYPE2, n_3_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_3_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_3_##TYPE1[i * 2 + 0] != x_3_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 2 + 1] != x2_3_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_3_##TYPE2[i] != y_3_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_3_##TYPE1_##TYPE2; i < n_3_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_3_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_3_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_4(TYPE1, TYPE2)                                                    
\
+  int n_4_##TYPE1_##TYPE2 = 128;                                               
\
+  TYPE1 x_4_##TYPE1 = 222;                                                     
\
+  TYPE1 x2_4_##TYPE1 = 59;                                                     
\
+  TYPE2 y_4_##TYPE2 = 4321;                                                    
\
+  TYPE1 f_4_##TYPE1[129 * 2 + 1] = {0};                                        
\
+  TYPE2 d_4_##TYPE2[129] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_4_##TYPE1, d_4_##TYPE2, x_4_##TYPE1, x2_4_##TYPE1, 
\
+                         y_4_##TYPE2, n_4_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_4_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_4_##TYPE1[i * 2 + 0] != x_4_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 2 + 1] != x2_4_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_4_##TYPE2[i] != y_4_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_4_##TYPE1_##TYPE2; i < n_4_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_4_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_4_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_5(TYPE1, TYPE2)                                                    
\
+  int n_5_##TYPE1_##TYPE2 = 177;                                               
\
+  TYPE1 x_5_##TYPE1 = 111;                                                     
\
+  TYPE1 x2_5_##TYPE1 = 189;                                                    
\
+  TYPE2 y_5_##TYPE2 = 5555;                                                    
\
+  TYPE1 f_5_##TYPE1[178 * 2 + 1] = {0};                                        
\
+  TYPE2 d_5_##TYPE2[178] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_5_##TYPE1, d_5_##TYPE2, x_5_##TYPE1, x2_5_##TYPE1, 
\
+                         y_5_##TYPE2, n_5_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_5_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_5_##TYPE1[i * 2 + 0] != x_5_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 2 + 1] != x2_5_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_5_##TYPE2[i] != y_5_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_5_##TYPE1_##TYPE2; i < n_5_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_5_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_5_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_6(TYPE1, TYPE2)                                                    
\
+  int n_6_##TYPE1_##TYPE2 = 255;                                               
\
+  TYPE1 x_6_##TYPE1 = 123;                                                     
\
+  TYPE1 x2_6_##TYPE1 = 132;                                                    
\
+  TYPE2 y_6_##TYPE2 = 6655;                                                    
\
+  TYPE1 f_6_##TYPE1[256 * 2 + 1] = {0};                                        
\
+  TYPE2 d_6_##TYPE2[256] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_6_##TYPE1, d_6_##TYPE2, x_6_##TYPE1, x2_6_##TYPE1, 
\
+                         y_6_##TYPE2, n_6_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_6_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_6_##TYPE1[i * 2 + 0] != x_6_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 2 + 1] != x2_6_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_6_##TYPE2[i] != y_6_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_6_##TYPE1_##TYPE2; i < n_6_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_6_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_6_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_7(TYPE1, TYPE2)                                                    
\
+  int n_7_##TYPE1_##TYPE2 = 333;                                               
\
+  TYPE1 x_7_##TYPE1 = 39;                                                      
\
+  TYPE1 x2_7_##TYPE1 = 59;                                                     
\
+  TYPE2 y_7_##TYPE2 = 5968;                                                    
\
+  TYPE1 f_7_##TYPE1[334 * 2 + 1] = {0};                                        
\
+  TYPE2 d_7_##TYPE2[334] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_7_##TYPE1, d_7_##TYPE2, x_7_##TYPE1, x2_7_##TYPE1, 
\
+                         y_7_##TYPE2, n_7_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_7_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_7_##TYPE1[i * 2 + 0] != x_7_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 2 + 1] != x2_7_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_7_##TYPE2[i] != y_7_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_7_##TYPE1_##TYPE2; i < n_7_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_7_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_7_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_8(TYPE1, TYPE2)                                                    
\
+  int n_8_##TYPE1_##TYPE2 = 512;                                               
\
+  TYPE1 x_8_##TYPE1 = 71;                                                      
\
+  TYPE1 x2_8_##TYPE1 = 255;                                                    
\
+  TYPE2 y_8_##TYPE2 = 3366;                                                    
\
+  TYPE1 f_8_##TYPE1[513 * 2 + 1] = {0};                                        
\
+  TYPE2 d_8_##TYPE2[513] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_8_##TYPE1, d_8_##TYPE2, x_8_##TYPE1, x2_8_##TYPE1, 
\
+                         y_8_##TYPE2, n_8_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_8_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_8_##TYPE1[i * 2 + 0] != x_8_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 2 + 1] != x2_8_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_8_##TYPE2[i] != y_8_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_8_##TYPE1_##TYPE2; i < n_8_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_8_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_8_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_9(TYPE1, TYPE2)                                                    
\
+  int n_9_##TYPE1_##TYPE2 = 637;                                               
\
+  TYPE1 x_9_##TYPE1 = 157;                                                     
\
+  TYPE1 x2_9_##TYPE1 = 89;                                                     
\
+  TYPE2 y_9_##TYPE2 = 5511;                                                    
\
+  TYPE1 f_9_##TYPE1[638 * 2 + 1] = {0};                                        
\
+  TYPE2 d_9_##TYPE2[638] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_9_##TYPE1, d_9_##TYPE2, x_9_##TYPE1, x2_9_##TYPE1, 
\
+                         y_9_##TYPE2, n_9_##TYPE1_##TYPE2);                   \
+  for (int i = 0; i < n_9_##TYPE1_##TYPE2; ++i)                                
\
+    {                                                                          
\
+      if (f_9_##TYPE1[i * 2 + 0] != x_9_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 2 + 1] != x2_9_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_9_##TYPE2[i] != y_9_##TYPE2)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_9_##TYPE1_##TYPE2; i < n_9_##TYPE1_##TYPE2 + 1; ++i)          
\
+    {                                                                          
\
+      if (f_9_##TYPE1[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_9_##TYPE2[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_10(TYPE1, TYPE2)                                                   
\
+  int n_10_##TYPE1_##TYPE2 = 777;                                              
\
+  TYPE1 x_10_##TYPE1 = 203;                                                    
\
+  TYPE1 x2_10_##TYPE1 = 200;                                                   
\
+  TYPE2 y_10_##TYPE2 = 2023;                                                   
\
+  TYPE1 f_10_##TYPE1[778 * 2 + 1] = {0};                                       
\
+  TYPE2 d_10_##TYPE2[778] = {0};                                               
\
+  test_1_##TYPE1_##TYPE2 (f_10_##TYPE1, d_10_##TYPE2, x_10_##TYPE1,            
\
+                         x2_10_##TYPE1, y_10_##TYPE2, n_10_##TYPE1_##TYPE2);  \
+  for (int i = 0; i < n_10_##TYPE1_##TYPE2; ++i)                               
\
+    {                                                                          
\
+      if (f_10_##TYPE1[i * 2 + 0] != x_10_##TYPE1)                             
\
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 2 + 1] != x2_10_##TYPE1)                            
\
+       __builtin_abort ();                                                    \
+      if (d_10_##TYPE2[i] != y_10_##TYPE2)                                     
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_10_##TYPE1_##TYPE2; i < n_10_##TYPE1_##TYPE2 + 1; ++i)        
\
+    {                                                                          
\
+      if (f_10_##TYPE1[i * 2 + 0] != 0)                                        
\
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 2 + 1] != 0)                                        
\
+       __builtin_abort ();                                                    \
+      if (d_10_##TYPE2[i] != 0)                                                
\
+       __builtin_abort ();                                                    \
+    }
+
+#define TEST_ALL(T)                                                            
\
+  T (int8_t, int16_t)                                                          
\
+  T (uint8_t, uint16_t)                                                        
\
+  T (int16_t, int32_t)                                                         
\
+  T (uint16_t, uint32_t)                                                       
\
+  T (int32_t, int64_t)                                                         
\
+  T (uint32_t, uint64_t)                                                       
\
+  T (float, double)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c
new file mode 100644
index 00000000000..d1c41907547
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=fixed-vlmax" } */
+
+#include "multiple_rgroup-2.h"
+
+TEST_ALL (test_1)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
new file mode 100644
index 00000000000..045a76de45f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
@@ -0,0 +1,546 @@
+#include <stddef.h>
+#include <stdint-gcc.h>
+
+#define test_1(TYPE1, TYPE2, TYPE3)                                            
\
+  void __attribute__ ((noinline, noclone))                                     
\
+  test_1_##TYPE1_##TYPE2 (TYPE1 *__restrict f, TYPE2 *__restrict d,            
\
+                         TYPE3 *__restrict e, TYPE1 x, TYPE1 x2, TYPE1 x3,    \
+                         TYPE1 x4, TYPE2 y, TYPE2 y2, TYPE3 z, int n)         \
+  {                                                                            
\
+    for (int i = 0; i < n; ++i)                                                
\
+      {                                                                        
\
+       f[i * 4 + 0] = x;                                                      \
+       f[i * 4 + 1] = x2;                                                     \
+       f[i * 4 + 2] = x3;                                                     \
+       f[i * 4 + 3] = x4;                                                     \
+       d[i * 2 + 0] = y;                                                      \
+       d[i * 2 + 1] = y2;                                                     \
+       e[i] = z;                                                              \
+      }                                                                        
\
+  }
+
+#define run_1(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_1_##TYPE1_##TYPE2_##TYPE3 = 1;                                         
\
+  TYPE1 x_1_##TYPE1 = 117;                                                     
\
+  TYPE1 x2_1_##TYPE1 = 232;                                                    
\
+  TYPE1 x3_1_##TYPE1 = 127;                                                    
\
+  TYPE1 x4_1_##TYPE1 = 11;                                                     
\
+  TYPE2 y_1_##TYPE2 = 9762;                                                    
\
+  TYPE2 y2_1_##TYPE2 = 6279;                                                   
\
+  TYPE3 z_1_##TYPE3 = 5891663;                                                 
\
+  TYPE1 f_1_##TYPE1[2 * 4 + 1] = {0};                                          
\
+  TYPE2 d_1_##TYPE2[2 * 2 + 1] = {0};                                          
\
+  TYPE3 e_1_##TYPE3[2] = {0};                                                  
\
+  test_1_##TYPE1_##TYPE2 (f_1_##TYPE1, d_1_##TYPE2, e_1_##TYPE3, x_1_##TYPE1,  
\
+                         x2_1_##TYPE1, x3_1_##TYPE1, x4_1_##TYPE1,            \
+                         y_1_##TYPE2, y2_1_##TYPE2, z_1_##TYPE3,              \
+                         n_1_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_1_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_1_##TYPE1[i * 4 + 0] != x_1_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 4 + 1] != x2_1_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 4 + 2] != x3_1_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 4 + 3] != x4_1_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_1_##TYPE2[i * 2 + 0] != y_1_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_1_##TYPE2[i * 2 + 1] != y2_1_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_1_##TYPE3[i] != z_1_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_1_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_1_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_1_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_1_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_1_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_1_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_1_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_2(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_2_##TYPE1_##TYPE2_##TYPE3 = 17;                                        
\
+  TYPE1 x_2_##TYPE1 = 107;                                                     
\
+  TYPE1 x2_2_##TYPE1 = 202;                                                    
\
+  TYPE1 x3_2_##TYPE1 = 17;                                                     
\
+  TYPE1 x4_2_##TYPE1 = 53;                                                     
\
+  TYPE2 y_2_##TYPE2 = 5566;                                                    
\
+  TYPE2 y2_2_##TYPE2 = 7926;                                                   
\
+  TYPE3 z_2_##TYPE3 = 781545971;                                               
\
+  TYPE1 f_2_##TYPE1[18 * 4 + 1] = {0};                                         
\
+  TYPE2 d_2_##TYPE2[18 * 2 + 1] = {0};                                         
\
+  TYPE3 e_2_##TYPE3[18] = {0};                                                 
\
+  test_1_##TYPE1_##TYPE2 (f_2_##TYPE1, d_2_##TYPE2, e_2_##TYPE3, x_2_##TYPE1,  
\
+                         x2_2_##TYPE1, x3_2_##TYPE1, x4_2_##TYPE1,            \
+                         y_2_##TYPE2, y2_2_##TYPE2, z_2_##TYPE3,              \
+                         n_2_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_2_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_2_##TYPE1[i * 4 + 0] != x_2_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 4 + 1] != x2_2_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 4 + 2] != x3_2_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 4 + 3] != x4_2_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_2_##TYPE2[i * 2 + 0] != y_2_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_2_##TYPE2[i * 2 + 1] != y2_2_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_2_##TYPE3[i] != z_2_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_2_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_2_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_2_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_2_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_2_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_2_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_2_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_3(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_3_##TYPE1_##TYPE2_##TYPE3 = 32;                                        
\
+  TYPE1 x_3_##TYPE1 = 109;                                                     
\
+  TYPE1 x2_3_##TYPE1 = 239;                                                    
\
+  TYPE1 x3_3_##TYPE1 = 151;                                                    
\
+  TYPE1 x4_3_##TYPE1 = 3;                                                      
\
+  TYPE2 y_3_##TYPE2 = 1234;                                                    
\
+  TYPE2 y2_3_##TYPE2 = 4321;                                                   
\
+  TYPE3 z_3_##TYPE3 = 145615615;                                               
\
+  TYPE1 f_3_##TYPE1[33 * 4 + 1] = {0};                                         
\
+  TYPE2 d_3_##TYPE2[33 * 2 + 1] = {0};                                         
\
+  TYPE3 e_3_##TYPE3[33] = {0};                                                 
\
+  test_1_##TYPE1_##TYPE2 (f_3_##TYPE1, d_3_##TYPE2, e_3_##TYPE3, x_3_##TYPE1,  
\
+                         x2_3_##TYPE1, x3_3_##TYPE1, x4_3_##TYPE1,            \
+                         y_3_##TYPE2, y2_3_##TYPE2, z_3_##TYPE3,              \
+                         n_3_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_3_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_3_##TYPE1[i * 4 + 0] != x_3_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 4 + 1] != x2_3_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 4 + 2] != x3_3_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 4 + 3] != x4_3_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_3_##TYPE2[i * 2 + 0] != y_3_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_3_##TYPE2[i * 2 + 1] != y2_3_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_3_##TYPE3[i] != z_3_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_3_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_3_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_3_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_3_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_3_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_3_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_3_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_4(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_4_##TYPE1_##TYPE2_##TYPE3 = 128;                                       
\
+  TYPE1 x_4_##TYPE1 = 239;                                                     
\
+  TYPE1 x2_4_##TYPE1 = 132;                                                    
\
+  TYPE1 x3_4_##TYPE1 = 39;                                                     
\
+  TYPE1 x4_4_##TYPE1 = 48;                                                     
\
+  TYPE2 y_4_##TYPE2 = 1036;                                                    
\
+  TYPE2 y2_4_##TYPE2 = 3665;                                                   
\
+  TYPE3 z_4_##TYPE3 = 5145656;                                                 
\
+  TYPE1 f_4_##TYPE1[129 * 4 + 1] = {0};                                        
\
+  TYPE2 d_4_##TYPE2[129 * 2 + 1] = {0};                                        
\
+  TYPE3 e_4_##TYPE3[129] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_4_##TYPE1, d_4_##TYPE2, e_4_##TYPE3, x_4_##TYPE1,  
\
+                         x2_4_##TYPE1, x3_4_##TYPE1, x4_4_##TYPE1,            \
+                         y_4_##TYPE2, y2_4_##TYPE2, z_4_##TYPE3,              \
+                         n_4_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_4_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_4_##TYPE1[i * 4 + 0] != x_4_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 4 + 1] != x2_4_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 4 + 2] != x3_4_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 4 + 3] != x4_4_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_4_##TYPE2[i * 2 + 0] != y_4_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_4_##TYPE2[i * 2 + 1] != y2_4_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_4_##TYPE3[i] != z_4_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_4_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_4_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_4_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_4_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_4_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_4_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_4_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_5(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_5_##TYPE1_##TYPE2_##TYPE3 = 177;                                       
\
+  TYPE1 x_5_##TYPE1 = 239;                                                     
\
+  TYPE1 x2_5_##TYPE1 = 132;                                                    
\
+  TYPE1 x3_5_##TYPE1 = 39;                                                     
\
+  TYPE1 x4_5_##TYPE1 = 48;                                                     
\
+  TYPE2 y_5_##TYPE2 = 1036;                                                    
\
+  TYPE2 y2_5_##TYPE2 = 3665;                                                   
\
+  TYPE3 z_5_##TYPE3 = 5145656;                                                 
\
+  TYPE1 f_5_##TYPE1[178 * 4 + 1] = {0};                                        
\
+  TYPE2 d_5_##TYPE2[178 * 2 + 1] = {0};                                        
\
+  TYPE3 e_5_##TYPE3[178] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_5_##TYPE1, d_5_##TYPE2, e_5_##TYPE3, x_5_##TYPE1,  
\
+                         x2_5_##TYPE1, x3_5_##TYPE1, x4_5_##TYPE1,            \
+                         y_5_##TYPE2, y2_5_##TYPE2, z_5_##TYPE3,              \
+                         n_5_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_5_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_5_##TYPE1[i * 4 + 0] != x_5_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 4 + 1] != x2_5_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 4 + 2] != x3_5_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 4 + 3] != x4_5_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_5_##TYPE2[i * 2 + 0] != y_5_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_5_##TYPE2[i * 2 + 1] != y2_5_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_5_##TYPE3[i] != z_5_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_5_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_5_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_5_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_5_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_5_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_5_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_5_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_6(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_6_##TYPE1_##TYPE2_##TYPE3 = 255;                                       
\
+  TYPE1 x_6_##TYPE1 = 239;                                                     
\
+  TYPE1 x2_6_##TYPE1 = 132;                                                    
\
+  TYPE1 x3_6_##TYPE1 = 39;                                                     
\
+  TYPE1 x4_6_##TYPE1 = 48;                                                     
\
+  TYPE2 y_6_##TYPE2 = 1036;                                                    
\
+  TYPE2 y2_6_##TYPE2 = 3665;                                                   
\
+  TYPE3 z_6_##TYPE3 = 5145656;                                                 
\
+  TYPE1 f_6_##TYPE1[256 * 4 + 1] = {0};                                        
\
+  TYPE2 d_6_##TYPE2[256 * 2 + 1] = {0};                                        
\
+  TYPE3 e_6_##TYPE3[256] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_6_##TYPE1, d_6_##TYPE2, e_6_##TYPE3, x_6_##TYPE1,  
\
+                         x2_6_##TYPE1, x3_6_##TYPE1, x4_6_##TYPE1,            \
+                         y_6_##TYPE2, y2_6_##TYPE2, z_6_##TYPE3,              \
+                         n_6_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_6_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_6_##TYPE1[i * 4 + 0] != x_6_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 4 + 1] != x2_6_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 4 + 2] != x3_6_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 4 + 3] != x4_6_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_6_##TYPE2[i * 2 + 0] != y_6_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_6_##TYPE2[i * 2 + 1] != y2_6_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_6_##TYPE3[i] != z_6_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_6_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_6_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_6_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_6_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_6_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_6_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_6_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_7(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_7_##TYPE1_##TYPE2_##TYPE3 = 333;                                       
\
+  TYPE1 x_7_##TYPE1 = 239;                                                     
\
+  TYPE1 x2_7_##TYPE1 = 132;                                                    
\
+  TYPE1 x3_7_##TYPE1 = 39;                                                     
\
+  TYPE1 x4_7_##TYPE1 = 48;                                                     
\
+  TYPE2 y_7_##TYPE2 = 1036;                                                    
\
+  TYPE2 y2_7_##TYPE2 = 3665;                                                   
\
+  TYPE3 z_7_##TYPE3 = 5145656;                                                 
\
+  TYPE1 f_7_##TYPE1[334 * 4 + 1] = {0};                                        
\
+  TYPE2 d_7_##TYPE2[334 * 2 + 1] = {0};                                        
\
+  TYPE3 e_7_##TYPE3[334] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_7_##TYPE1, d_7_##TYPE2, e_7_##TYPE3, x_7_##TYPE1,  
\
+                         x2_7_##TYPE1, x3_7_##TYPE1, x4_7_##TYPE1,            \
+                         y_7_##TYPE2, y2_7_##TYPE2, z_7_##TYPE3,              \
+                         n_7_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_7_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_7_##TYPE1[i * 4 + 0] != x_7_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 4 + 1] != x2_7_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 4 + 2] != x3_7_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 4 + 3] != x4_7_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_7_##TYPE2[i * 2 + 0] != y_7_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_7_##TYPE2[i * 2 + 1] != y2_7_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_7_##TYPE3[i] != z_7_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_7_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_7_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_7_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_7_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_7_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_7_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_7_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_8(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_8_##TYPE1_##TYPE2_##TYPE3 = 512;                                       
\
+  TYPE1 x_8_##TYPE1 = 239;                                                     
\
+  TYPE1 x2_8_##TYPE1 = 132;                                                    
\
+  TYPE1 x3_8_##TYPE1 = 39;                                                     
\
+  TYPE1 x4_8_##TYPE1 = 48;                                                     
\
+  TYPE2 y_8_##TYPE2 = 1036;                                                    
\
+  TYPE2 y2_8_##TYPE2 = 3665;                                                   
\
+  TYPE3 z_8_##TYPE3 = 5145656;                                                 
\
+  TYPE1 f_8_##TYPE1[513 * 4 + 1] = {0};                                        
\
+  TYPE2 d_8_##TYPE2[513 * 2 + 1] = {0};                                        
\
+  TYPE3 e_8_##TYPE3[513] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_8_##TYPE1, d_8_##TYPE2, e_8_##TYPE3, x_8_##TYPE1,  
\
+                         x2_8_##TYPE1, x3_8_##TYPE1, x4_8_##TYPE1,            \
+                         y_8_##TYPE2, y2_8_##TYPE2, z_8_##TYPE3,              \
+                         n_8_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_8_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_8_##TYPE1[i * 4 + 0] != x_8_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 4 + 1] != x2_8_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 4 + 2] != x3_8_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 4 + 3] != x4_8_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_8_##TYPE2[i * 2 + 0] != y_8_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_8_##TYPE2[i * 2 + 1] != y2_8_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_8_##TYPE3[i] != z_8_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_8_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_8_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_8_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_8_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_8_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_8_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_8_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_9(TYPE1, TYPE2, TYPE3)                                             
\
+  int n_9_##TYPE1_##TYPE2_##TYPE3 = 637;                                       
\
+  TYPE1 x_9_##TYPE1 = 222;                                                     
\
+  TYPE1 x2_9_##TYPE1 = 111;                                                    
\
+  TYPE1 x3_9_##TYPE1 = 11;                                                     
\
+  TYPE1 x4_9_##TYPE1 = 7;                                                     \
+  TYPE2 y_9_##TYPE2 = 2034;                                                    
\
+  TYPE2 y2_9_##TYPE2 = 6987;                                                   
\
+  TYPE3 z_9_##TYPE3 = 1564616;                                                 
\
+  TYPE1 f_9_##TYPE1[638 * 4 + 1] = {0};                                        
\
+  TYPE2 d_9_##TYPE2[638 * 2 + 1] = {0};                                        
\
+  TYPE3 e_9_##TYPE3[638] = {0};                                                
\
+  test_1_##TYPE1_##TYPE2 (f_9_##TYPE1, d_9_##TYPE2, e_9_##TYPE3, x_9_##TYPE1,  
\
+                         x2_9_##TYPE1, x3_9_##TYPE1, x4_9_##TYPE1,            \
+                         y_9_##TYPE2, y2_9_##TYPE2, z_9_##TYPE3,              \
+                         n_9_##TYPE1_##TYPE2_##TYPE3);                        \
+  for (int i = 0; i < n_9_##TYPE1_##TYPE2_##TYPE3; ++i)                        
\
+    {                                                                          
\
+      if (f_9_##TYPE1[i * 4 + 0] != x_9_##TYPE1)                               
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 4 + 1] != x2_9_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 4 + 2] != x3_9_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 4 + 3] != x4_9_##TYPE1)                              
\
+       __builtin_abort ();                                                    \
+      if (d_9_##TYPE2[i * 2 + 0] != y_9_##TYPE2)                               
\
+       __builtin_abort ();                                                    \
+      if (d_9_##TYPE2[i * 2 + 1] != y2_9_##TYPE2)                              
\
+       __builtin_abort ();                                                    \
+      if (e_9_##TYPE3[i] != z_9_##TYPE3)                                       
\
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_9_##TYPE1_##TYPE2_##TYPE3;                                    
\
+       i < n_9_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                               
\
+    {                                                                          
\
+      if (f_9_##TYPE1[i * 4 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 4 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 4 + 2] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (f_9_##TYPE1[i * 4 + 3] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_9_##TYPE2[i * 2 + 0] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (d_9_##TYPE2[i * 2 + 1] != 0)                                         
\
+       __builtin_abort ();                                                    \
+      if (e_9_##TYPE3[i] != 0)                                                 
\
+       __builtin_abort ();                                                    \
+    }
+
+#define run_10(TYPE1, TYPE2, TYPE3)                                            
 \
+  int n_10_##TYPE1_##TYPE2_##TYPE3 = 777;                                      
 \
+  TYPE1 x_10_##TYPE1 = 222;                                                    
 \
+  TYPE1 x2_10_##TYPE1 = 111;                                                   
 \
+  TYPE1 x3_10_##TYPE1 = 11;                                                    
 \
+  TYPE1 x4_10_##TYPE1 = 7;                                                     
\
+  TYPE2 y_10_##TYPE2 = 2034;                                                   
 \
+  TYPE2 y2_10_##TYPE2 = 6987;                                                  
 \
+  TYPE3 z_10_##TYPE3 = 1564616;                                                
 \
+  TYPE1 f_10_##TYPE1[778 * 4 + 1] = {0};                                       
 \
+  TYPE2 d_10_##TYPE2[778 * 2 + 1] = {0};                                       
 \
+  TYPE3 e_10_##TYPE3[778] = {0};                                               
 \
+  test_1_##TYPE1_##TYPE2 (f_10_##TYPE1, d_10_##TYPE2, e_10_##TYPE3, 
x_10_##TYPE1,  \
+                         x2_10_##TYPE1, x3_10_##TYPE1, x4_10_##TYPE1,          
  \
+                         y_10_##TYPE2, y2_10_##TYPE2, z_10_##TYPE3,            
  \
+                         n_10_##TYPE1_##TYPE2_##TYPE3);                        
\
+  for (int i = 0; i < n_10_##TYPE1_##TYPE2_##TYPE3; ++i)                       
 \
+    {                                                                          
\
+      if (f_10_##TYPE1[i * 4 + 0] != x_10_##TYPE1)                             
  \
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 4 + 1] != x2_10_##TYPE1)                            
  \
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 4 + 2] != x3_10_##TYPE1)                            
  \
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 4 + 3] != x4_10_##TYPE1)                            
  \
+       __builtin_abort ();                                                    \
+      if (d_10_##TYPE2[i * 2 + 0] != y_10_##TYPE2)                             
  \
+       __builtin_abort ();                                                    \
+      if (d_10_##TYPE2[i * 2 + 1] != y2_10_##TYPE2)                            
  \
+       __builtin_abort ();                                                    \
+      if (e_10_##TYPE3[i] != z_10_##TYPE3)                                     
  \
+       __builtin_abort ();                                                    \
+    }                                                                          
\
+  for (int i = n_10_##TYPE1_##TYPE2_##TYPE3;                                   
 \
+       i < n_10_##TYPE1_##TYPE2_##TYPE3 + 1; ++i)                              
 \
+    {                                                                          
\
+      if (f_10_##TYPE1[i * 4 + 0] != 0)                                        
 \
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 4 + 1] != 0)                                        
 \
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 4 + 2] != 0)                                        
 \
+       __builtin_abort ();                                                    \
+      if (f_10_##TYPE1[i * 4 + 3] != 0)                                        
 \
+       __builtin_abort ();                                                    \
+      if (d_10_##TYPE2[i * 2 + 0] != 0)                                        
 \
+       __builtin_abort ();                                                    \
+      if (d_10_##TYPE2[i * 2 + 1] != 0)                                        
 \
+       __builtin_abort ();                                                    \
+      if (e_10_##TYPE3[i] != 0)                                                
 \
+       __builtin_abort ();                                                    \
+    }
+
+#define TEST_ALL(T)                                                            
\
+  T (int8_t, int16_t, int32_t)                                                 
\
+  T (uint8_t, uint16_t, uint32_t)                                              
\
+  T (int16_t, int32_t, int64_t)                                                
\
+  T (uint16_t, uint32_t, uint64_t)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c
new file mode 100644
index 00000000000..d3e187eae68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "multiple_rgroup-1.c"
+
+int main (void)
+{
+  TEST_ALL (run_1)
+  TEST_ALL (run_2)
+  TEST_ALL (run_3)
+  TEST_ALL (run_4)
+  TEST_ALL (run_5)
+  TEST_ALL (run_6)
+  TEST_ALL (run_7)
+  TEST_ALL (run_8)
+  TEST_ALL (run_9)
+  TEST_ALL (run_10)
+  return 0;
+}
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c
new file mode 100644
index 00000000000..5166c9e35a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "multiple_rgroup-2.c"
+
+int main (void)
+{
+  TEST_ALL (run_1)
+  TEST_ALL (run_2)
+  TEST_ALL (run_3)
+  TEST_ALL (run_4)
+  TEST_ALL (run_5)
+  TEST_ALL (run_6)
+  TEST_ALL (run_7)
+  TEST_ALL (run_8)
+  TEST_ALL (run_9)
+  TEST_ALL (run_10)
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 905145ae97b..a13d6f5e898 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10364,8 +10364,9 @@ vect_record_loop_len (loop_vec_info loop_vinfo, 
vec_loop_lens *lens,
    rgroup that operates on NVECTORS vectors, where 0 <= INDEX < NVECTORS.  */
 
 tree
-vect_get_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
-                  unsigned int nvectors, unsigned int index)
+vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+                  vec_loop_lens *lens, unsigned int nvectors, tree vectype,
+                  unsigned int index, unsigned int factor)
 {
   rgroup_controls *rgl = &(*lens)[nvectors - 1];
   bool use_bias_adjusted_len =
@@ -10400,6 +10401,27 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
vec_loop_lens *lens,
 
   if (use_bias_adjusted_len)
     return rgl->bias_adjusted_ctrl;
+  else if (rgl->factor == 1 && factor == 1)
+    {
+      tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+      tree loop_len = rgl->controls[index];
+      poly_int64 nunits1 = TYPE_VECTOR_SUBPARTS (rgl->type);
+      poly_int64 nunits2 = TYPE_VECTOR_SUBPARTS (vectype);
+      if (maybe_ne (nunits1, nunits2))
+       {
+         /* A loop len for data type X can be reused for data type Y
+            if X has N times more elements than Y and if Y's elements
+            are N times bigger than X's.  */
+         gcc_assert (multiple_p (nunits1, nunits2));
+         factor = exact_div (nunits1, nunits2).to_constant ();
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, RDIV_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, factor));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+      return loop_len;
+    }
   else
     return rgl->controls[index];
 }
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 821a8c3c238..0022b878767 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8794,14 +8794,17 @@ vectorizable_store (vec_info *vinfo,
                }
              else if (loop_lens)
                {
-                 tree final_len
-                   = vect_get_loop_len (loop_vinfo, loop_lens,
-                                        vec_num * ncopies, vec_num * j + i);
-                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
                  machine_mode vmode = TYPE_MODE (vectype);
                  opt_machine_mode new_ovmode
                    = get_len_load_store_mode (vmode, false);
                  machine_mode new_vmode = new_ovmode.require ();
+                 unsigned factor
+                   = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
+                 tree final_len
+                   = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                        vec_num * ncopies, vectype,
+                                        vec_num * j + i, factor);
+                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
                  /* Need conversion if it's wrapped with VnQI.  */
                  if (vmode != new_vmode)
                    {
@@ -10150,17 +10153,20 @@ vectorizable_load (vec_info *vinfo,
                      }
                    else if (loop_lens && memory_access_type != VMAT_INVARIANT)
                      {
-                       tree final_len
-                         = vect_get_loop_len (loop_vinfo, loop_lens,
-                                              vec_num * ncopies,
-                                              vec_num * j + i);
-                       tree ptr = build_int_cst (ref_type,
-                                                 align * BITS_PER_UNIT);
-
                        machine_mode vmode = TYPE_MODE (vectype);
                        opt_machine_mode new_ovmode
                          = get_len_load_store_mode (vmode, true);
                        machine_mode new_vmode = new_ovmode.require ();
+                       unsigned factor = (new_ovmode == vmode)
+                                           ? 1
+                                           : GET_MODE_UNIT_SIZE (vmode);
+                       tree final_len
+                         = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                              vec_num * ncopies, vectype,
+                                              vec_num * j + i, factor);
+                       tree ptr
+                         = build_int_cst (ref_type, align * BITS_PER_UNIT);
+
                        tree qi_type = unsigned_intQI_type_node;
 
                        signed char biasval =
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 9cf2fb23fe3..02d2ad6fba1 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2293,8 +2293,9 @@ extern tree vect_get_loop_mask (gimple_stmt_iterator *, 
vec_loop_masks *,
                                unsigned int, tree, unsigned int);
 extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
                                  tree, unsigned int);
-extern tree vect_get_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
-                              unsigned int);
+extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
+                              vec_loop_lens *, unsigned int, tree,
+                              unsigned int, unsigned int);
 extern gimple_seq vect_gen_len (tree, tree, tree, tree);
 extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
 extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
-- 
2.36.3

Reply via email to