Hi Chen,
Thanks for your comments. I've changed `strlen(name)` to 5: the length
of "list" including the null-terminator. Note that we need to include
the null-terminator in the length here to avoid also matching e.g.
`--cpuid lista` (with trailing characters).
Please find revised patch below and attached.
Thanks,
George
-- >8 --
Rather than needing to guess what architecture features are available to
be tested, simply allow the user to query the testbench to discover the
available features. Since the list of architectures is now a global
variable, adjust the naming to camelCase instead of snake_case to match
the rest of the file.
Also add the relevant explanation line to --help text.
Also take this opportunity to reorder SVE/SVE2 in the Arm architecture
list such that they occur below the group of Neon extensions.
---
source/test/testbench.cpp | 92 ++++++++++++++++++++++-----------------
1 file changed, 53 insertions(+), 39 deletions(-)
diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
index b8ef760f2..f651dc51b 100644
--- a/source/test/testbench.cpp
+++ b/source/test/testbench.cpp
@@ -80,13 +80,52 @@ const char* const* chromaPartStr[X265_CSP_COUNT] =
lumaPartStr
};
+struct test_arch_t
+{
+ char name[13];
+ int flag;
+} testArch[] =
+{
+#if X265_ARCH_X86
+ { "SSE2", X265_CPU_SSE2 },
+ { "SSE3", X265_CPU_SSE3 },
+ { "SSSE3", X265_CPU_SSSE3 },
+ { "SSE4", X265_CPU_SSE4 },
+ { "AVX", X265_CPU_AVX },
+ { "XOP", X265_CPU_XOP },
+ { "AVX2", X265_CPU_AVX2 },
+ { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 },
+ { "AVX512", X265_CPU_AVX512 },
+#else
+ { "ARMv6", X265_CPU_ARMV6 },
+ { "NEON", X265_CPU_NEON },
+ { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
+ { "Neon_I8MM", X265_CPU_NEON_I8MM },
+ { "SVE", X265_CPU_SVE },
+ { "SVE2", X265_CPU_SVE2 },
+ { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
+#endif
+ { "", 0 },
+};
+
+void do_cpuid_list(int cpuid)
+{
+ printf("x265 detected --cpuid architectures:\n");
+ for (int i = 0; testArch[i].flag; i++)
+ {
+ if ((testArch[i].flag & cpuid) == testArch[i].flag)
+ printf(" %s\n", testArch[i].name);
+ }
+}
+
void do_help()
{
printf("x265 optimized primitive testbench\n\n");
printf("usage: TestBench [--cpuid CPU] [--testbench BENCH] [--nobench]
[--help]\n\n");
- printf(" CPU is comma separated SIMD arch list, example:
SSE4,AVX\n");
+ printf(" CPU is comma separated SIMD architecture list, for example:
SSE4,AVX\n");
+ printf(" Use `--cpuid list` to print a list of detected SIMD
architectures\n\n");
printf(" BENCH is one of (pixel,transforms,interp,intrapred)\n\n");
- printf(" --nobench disables running benchmarks, only run correctness
tests\n\n");
+ printf(" `--nobench` disables running benchmarks, only run
correctness tests\n\n");
printf("By default, the test bench will test all benches on detected CPU
architectures\n");
printf("Options and testbench name may be truncated.\n");
}
@@ -120,6 +159,11 @@ int main(int argc, char *argv[])
}
else if (!strncmp(name, "cpuid", strlen(name)))
{
+ if (!strncmp(value, "list", 5))
+ {
+ do_cpuid_list(cpuid);
+ return 0;
+ }
int cpu_detect_cpuid = cpuid;
bool bError = false;
cpuid = parseCpuName(value, bError, enableavx512);
@@ -173,48 +217,18 @@ int main(int argc, char *argv[])
setupCPrimitives(cprim);
setupAliasPrimitives(cprim);
- struct test_arch_t
- {
- char name[13];
- int flag;
- } test_arch[] =
+ for (int i = 0; testArch[i].flag; i++)
{
-#if X265_ARCH_X86
- { "SSE2", X265_CPU_SSE2 },
- { "SSE3", X265_CPU_SSE3 },
- { "SSSE3", X265_CPU_SSSE3 },
- { "SSE4", X265_CPU_SSE4 },
- { "AVX", X265_CPU_AVX },
- { "XOP", X265_CPU_XOP },
- { "AVX2", X265_CPU_AVX2 },
- { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 },
- { "AVX512", X265_CPU_AVX512 },
-#else
- { "ARMv6", X265_CPU_ARMV6 },
- { "NEON", X265_CPU_NEON },
- { "SVE2", X265_CPU_SVE2 },
- { "SVE", X265_CPU_SVE },
- { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
- { "Neon_I8MM", X265_CPU_NEON_I8MM },
- { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
-#endif
- { "", 0 },
- };
-
- for (int i = 0; test_arch[i].flag; i++)
- {
- if ((test_arch[i].flag & cpuid) == test_arch[i].flag)
- {
- printf("Testing primitives: %s\n", test_arch[i].name);
- fflush(stdout);
- }
- else
+ if ((testArch[i].flag & cpuid) != testArch[i].flag)
continue;
+ printf("Testing primitives: %s\n", testArch[i].name);
+ fflush(stdout);
+
#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
EncoderPrimitives vecprim;
memset(&vecprim, 0, sizeof(vecprim));
- setupIntrinsicPrimitives(vecprim, test_arch[i].flag);
+ setupIntrinsicPrimitives(vecprim, testArch[i].flag);
setupAliasPrimitives(vecprim);
for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
{
@@ -232,7 +246,7 @@ int main(int argc, char *argv[])
EncoderPrimitives asmprim;
memset(&asmprim, 0, sizeof(asmprim));
- setupAssemblyPrimitives(asmprim, test_arch[i].flag);
+ setupAssemblyPrimitives(asmprim, testArch[i].flag);
setupAliasPrimitives(asmprim);
memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives));
for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
--
2.34.1
>From affffd6b789d92fab84212d8c902bfebed7da546 Mon Sep 17 00:00:00 2001
From: George Steed <[email protected]>
Date: Mon, 16 Dec 2024 15:43:14 +0000
Subject: [PATCH v2] testbench.cpp: Add --cpuid list to list valid SIMD
architectures
Rather than needing to guess what architecture features are available to
be tested, simply allow the user to query the testbench to discover the
available features. Since the list of architectures is now a global
variable, adjust the naming to camelCase instead of snake_case to match
the rest of the file.
Also add the relevant explanation line to --help text.
Also take this opportunity to reorder SVE/SVE2 in the Arm architecture
list such that they occur below the group of Neon extensions.
---
source/test/testbench.cpp | 92 ++++++++++++++++++++++-----------------
1 file changed, 53 insertions(+), 39 deletions(-)
diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
index b8ef760f2..f651dc51b 100644
--- a/source/test/testbench.cpp
+++ b/source/test/testbench.cpp
@@ -80,13 +80,52 @@ const char* const* chromaPartStr[X265_CSP_COUNT] =
lumaPartStr
};
+struct test_arch_t
+{
+ char name[13];
+ int flag;
+} testArch[] =
+{
+#if X265_ARCH_X86
+ { "SSE2", X265_CPU_SSE2 },
+ { "SSE3", X265_CPU_SSE3 },
+ { "SSSE3", X265_CPU_SSSE3 },
+ { "SSE4", X265_CPU_SSE4 },
+ { "AVX", X265_CPU_AVX },
+ { "XOP", X265_CPU_XOP },
+ { "AVX2", X265_CPU_AVX2 },
+ { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 },
+ { "AVX512", X265_CPU_AVX512 },
+#else
+ { "ARMv6", X265_CPU_ARMV6 },
+ { "NEON", X265_CPU_NEON },
+ { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
+ { "Neon_I8MM", X265_CPU_NEON_I8MM },
+ { "SVE", X265_CPU_SVE },
+ { "SVE2", X265_CPU_SVE2 },
+ { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
+#endif
+ { "", 0 },
+};
+
+void do_cpuid_list(int cpuid)
+{
+ printf("x265 detected --cpuid architectures:\n");
+ for (int i = 0; testArch[i].flag; i++)
+ {
+ if ((testArch[i].flag & cpuid) == testArch[i].flag)
+ printf(" %s\n", testArch[i].name);
+ }
+}
+
void do_help()
{
printf("x265 optimized primitive testbench\n\n");
printf("usage: TestBench [--cpuid CPU] [--testbench BENCH] [--nobench] [--help]\n\n");
- printf(" CPU is comma separated SIMD arch list, example: SSE4,AVX\n");
+ printf(" CPU is comma separated SIMD architecture list, for example: SSE4,AVX\n");
+ printf(" Use `--cpuid list` to print a list of detected SIMD architectures\n\n");
printf(" BENCH is one of (pixel,transforms,interp,intrapred)\n\n");
- printf(" --nobench disables running benchmarks, only run correctness tests\n\n");
+ printf(" `--nobench` disables running benchmarks, only run correctness tests\n\n");
printf("By default, the test bench will test all benches on detected CPU architectures\n");
printf("Options and testbench name may be truncated.\n");
}
@@ -120,6 +159,11 @@ int main(int argc, char *argv[])
}
else if (!strncmp(name, "cpuid", strlen(name)))
{
+ if (!strncmp(value, "list", 5))
+ {
+ do_cpuid_list(cpuid);
+ return 0;
+ }
int cpu_detect_cpuid = cpuid;
bool bError = false;
cpuid = parseCpuName(value, bError, enableavx512);
@@ -173,48 +217,18 @@ int main(int argc, char *argv[])
setupCPrimitives(cprim);
setupAliasPrimitives(cprim);
- struct test_arch_t
- {
- char name[13];
- int flag;
- } test_arch[] =
+ for (int i = 0; testArch[i].flag; i++)
{
-#if X265_ARCH_X86
- { "SSE2", X265_CPU_SSE2 },
- { "SSE3", X265_CPU_SSE3 },
- { "SSSE3", X265_CPU_SSSE3 },
- { "SSE4", X265_CPU_SSE4 },
- { "AVX", X265_CPU_AVX },
- { "XOP", X265_CPU_XOP },
- { "AVX2", X265_CPU_AVX2 },
- { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 },
- { "AVX512", X265_CPU_AVX512 },
-#else
- { "ARMv6", X265_CPU_ARMV6 },
- { "NEON", X265_CPU_NEON },
- { "SVE2", X265_CPU_SVE2 },
- { "SVE", X265_CPU_SVE },
- { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
- { "Neon_I8MM", X265_CPU_NEON_I8MM },
- { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
-#endif
- { "", 0 },
- };
-
- for (int i = 0; test_arch[i].flag; i++)
- {
- if ((test_arch[i].flag & cpuid) == test_arch[i].flag)
- {
- printf("Testing primitives: %s\n", test_arch[i].name);
- fflush(stdout);
- }
- else
+ if ((testArch[i].flag & cpuid) != testArch[i].flag)
continue;
+ printf("Testing primitives: %s\n", testArch[i].name);
+ fflush(stdout);
+
#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
EncoderPrimitives vecprim;
memset(&vecprim, 0, sizeof(vecprim));
- setupIntrinsicPrimitives(vecprim, test_arch[i].flag);
+ setupIntrinsicPrimitives(vecprim, testArch[i].flag);
setupAliasPrimitives(vecprim);
for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
{
@@ -232,7 +246,7 @@ int main(int argc, char *argv[])
EncoderPrimitives asmprim;
memset(&asmprim, 0, sizeof(asmprim));
- setupAssemblyPrimitives(asmprim, test_arch[i].flag);
+ setupAssemblyPrimitives(asmprim, testArch[i].flag);
setupAliasPrimitives(asmprim);
memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives));
for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
--
2.34.1
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel