Em Wed, Jun 14, 2017 at 10:53:40AM +0800, Jin Yao escreveu:
> Macro fusion merges two instructions to a single micro-op. Intel
> core platform performs this hardware optimization under limited
> circumstances. For example, CMP + JCC can be "fused" and executed
> /retired together. While with sampling this can result in the
> sample sometimes being on the JCC and sometimes on the CMP.
> So for the fused instruction pair, they could be considered
> together.

doing it as a weak function that will be overriden by the host arch
doesn't work, as we also support cross-annotation. So you have to take
into account perf_evsel__env_arch(evsel), etc.

Please search for perf_evsel__env_arch(evsel) in the annotation source
files to see how it is used.

- Arnaldo
 
> In general, the fused instruction pairs are:
> 
> cmp/test/add/sub/and/inc/dec + jcc.
> 
> This patch adds a new function which checks if 2 x86 instructions
> are in a "fused" pair. For non-x86 arch, the function just returns
> false.
> 
> Signed-off-by: Jin Yao <yao....@linux.intel.com>
> ---
>  tools/perf/arch/x86/util/Build   |  1 +
>  tools/perf/arch/x86/util/fused.c | 20 ++++++++++++++++++++
>  tools/perf/util/Build            |  1 +
>  tools/perf/util/fused.c          | 11 +++++++++++
>  tools/perf/util/fused.h          |  8 ++++++++
>  5 files changed, 41 insertions(+)
>  create mode 100644 tools/perf/arch/x86/util/fused.c
>  create mode 100644 tools/perf/util/fused.c
>  create mode 100644 tools/perf/util/fused.h
> 
> diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
> index f95e6f4..3809348 100644
> --- a/tools/perf/arch/x86/util/Build
> +++ b/tools/perf/arch/x86/util/Build
> @@ -4,6 +4,7 @@ libperf-y += pmu.o
>  libperf-y += kvm-stat.o
>  libperf-y += perf_regs.o
>  libperf-y += group.o
> +libperf-y += fused.o
>  
>  libperf-$(CONFIG_DWARF) += dwarf-regs.o
>  libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
> diff --git a/tools/perf/arch/x86/util/fused.c 
> b/tools/perf/arch/x86/util/fused.c
> new file mode 100644
> index 0000000..be28d22
> --- /dev/null
> +++ b/tools/perf/arch/x86/util/fused.c
> @@ -0,0 +1,20 @@
> +#include <string.h>
> +#include "../../util/fused.h"
> +
> +bool fused_insn_pair(const char *insn1, const char *insn2)
> +{
> +     if (strstr(insn2, "jmp"))
> +             return false;
> +
> +     if ((strstr(insn1, "cmp") && !strstr(insn1, "xchg")) ||
> +         strstr(insn1, "test") ||
> +         strstr(insn1, "add") ||
> +         strstr(insn1, "sub") ||
> +         strstr(insn1, "and") ||
> +         strstr(insn1, "inc") ||
> +         strstr(insn1, "dec")) {
> +             return true;
> +     }
> +
> +     return false;
> +}
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 79dea95..b83757d 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -93,6 +93,7 @@ libperf-y += drv_configs.o
>  libperf-y += units.o
>  libperf-y += time-utils.o
>  libperf-y += expr-bison.o
> +libperf-y += fused.o
>  
>  libperf-$(CONFIG_LIBBPF) += bpf-loader.o
>  libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
> diff --git a/tools/perf/util/fused.c b/tools/perf/util/fused.c
> new file mode 100644
> index 0000000..2cf56fa
> --- /dev/null
> +++ b/tools/perf/util/fused.c
> @@ -0,0 +1,11 @@
> +#include <linux/compiler.h>
> +#include <linux/types.h>
> +#include <string.h>
> +
> +#include "fused.h"
> +
> +bool __weak fused_insn_pair(const char *insn1 __maybe_unused,
> +                         const char *insn2 __maybe_unused)
> +{
> +     return false;
> +}
> diff --git a/tools/perf/util/fused.h b/tools/perf/util/fused.h
> new file mode 100644
> index 0000000..fa26714
> --- /dev/null
> +++ b/tools/perf/util/fused.h
> @@ -0,0 +1,8 @@
> +#ifndef __PERF_FUSED_H
> +#define __PERF_FUSED_H
> +
> +#include <linux/types.h>
> +
> +bool fused_insn_pair(const char *insn1, const char *insn2);
> +
> +#endif       /* __PERF_FUSED_H */
> -- 
> 2.7.4

Reply via email to