Em Thu, Apr 30, 2015 at 05:37:37PM +0300, Adrian Hunter escreveu:
> Add support for decoding an Intel Processor Trace.

Thanks for the function comments in kerneldoc style, we need more of that!

Some issues below:

- Arnaldo
 
> Signed-off-by: Adrian Hunter <adrian.hun...@intel.com>
> ---
>  tools/perf/util/intel-pt-decoder/Build             |    2 +-
>  .../perf/util/intel-pt-decoder/intel-pt-decoder.c  | 1738 
> ++++++++++++++++++++
>  .../perf/util/intel-pt-decoder/intel-pt-decoder.h  |   89 +
>  3 files changed, 1828 insertions(+), 1 deletion(-)
>  create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
>  create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> 
> diff --git a/tools/perf/util/intel-pt-decoder/Build 
> b/tools/perf/util/intel-pt-decoder/Build
> index 587321a..fa12eac 100644
> --- a/tools/perf/util/intel-pt-decoder/Build
> +++ b/tools/perf/util/intel-pt-decoder/Build
> @@ -1,4 +1,4 @@
> -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o 
> intel-pt-log.o
> +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o 
> intel-pt-log.o intel-pt-decoder.o
>  
>  inat_tables_script = ../../arch/x86/tools/gen-insn-attr-x86.awk
>  inat_tables_maps = ../../arch/x86/lib/x86-opcode-map.txt
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c 
> b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> new file mode 100644
> index 0000000..435b61b
> --- /dev/null
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> @@ -0,0 +1,1738 @@
> +/*
> + * intel_pt_decoder.c: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include "intel-pt-insn-decoder.h"
> +#include "intel-pt-pkt-decoder.h"
> +#include "intel-pt-decoder.h"
> +#include "intel-pt-log.h"
> +
> +#define INTEL_PT_BLK_SIZE 1024
> +
> +#define BIT63 (((uint64_t)1 << 63))
> +
> +#define INTEL_PT_RETURN 1
> +
> +struct intel_pt_blk {
> +     struct intel_pt_blk *prev;
> +     uint64_t ip[INTEL_PT_BLK_SIZE];
> +};
> +
> +struct intel_pt_stack {
> +     struct intel_pt_blk *blk;
> +     struct intel_pt_blk *spare;
> +     int pos;
> +};
> +
> +enum intel_pt_pkt_state {
> +     INTEL_PT_STATE_NO_PSB,
> +     INTEL_PT_STATE_NO_IP,
> +     INTEL_PT_STATE_ERR_RESYNC,
> +     INTEL_PT_STATE_IN_SYNC,
> +     INTEL_PT_STATE_TNT,
> +     INTEL_PT_STATE_TIP,
> +     INTEL_PT_STATE_TIP_PGD,
> +     INTEL_PT_STATE_FUP,
> +     INTEL_PT_STATE_FUP_NO_TIP,
> +};
> +
> +#ifdef INTEL_PT_STRICT
> +#define INTEL_PT_STATE_ERR1  INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR2  INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR3  INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR4  INTEL_PT_STATE_NO_PSB
> +#else
> +#define INTEL_PT_STATE_ERR1  (decoder->pkt_state)
> +#define INTEL_PT_STATE_ERR2  INTEL_PT_STATE_NO_IP
> +#define INTEL_PT_STATE_ERR3  INTEL_PT_STATE_ERR_RESYNC
> +#define INTEL_PT_STATE_ERR4  INTEL_PT_STATE_IN_SYNC
> +#endif
> +
> +struct intel_pt_decoder {
> +     int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
> +     int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
> +                      uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
> +                      uint64_t max_insn_cnt, void *data);
> +     void *data;
> +     struct intel_pt_state state;
> +     const unsigned char *buf;
> +     size_t len;
> +     bool return_compression;
> +     bool pge;
> +     uint64_t pos;
> +     uint64_t last_ip;
> +     uint64_t ip;
> +     uint64_t cr3;
> +     uint64_t timestamp;
> +     uint64_t tsc_timestamp;
> +     uint64_t ref_timestamp;
> +     uint64_t ret_addr;
> +     struct intel_pt_stack stack;
> +     enum intel_pt_pkt_state pkt_state;
> +     struct intel_pt_pkt packet;
> +     struct intel_pt_pkt tnt;
> +     int pkt_step;
> +     int pkt_len;
> +     unsigned int cbr;
> +     int exec_mode;
> +     unsigned int insn_bytes;
> +     uint64_t sign_bit;
> +     uint64_t sign_bits;
> +     uint64_t period;
> +     enum intel_pt_period_type period_type;
> +     uint64_t period_insn_cnt;
> +     uint64_t period_mask;
> +     uint64_t period_ticks;
> +     uint64_t last_masked_timestamp;
> +     bool continuous_period;
> +     bool overflow;
> +     bool set_fup_tx_flags;
> +     unsigned int fup_tx_flags;
> +     unsigned int tx_flags;
> +     uint64_t timestamp_insn_cnt;
> +     const unsigned char *next_buf;
> +     size_t next_len;
> +     unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
> +};
> +
> +static uint64_t intel_pt_lower_power_of_2(uint64_t x)
> +{
> +     int i;
> +
> +     for (i = 0; x != 1; i++)
> +             x >>= 1;
> +
> +     return x << i;
> +}

We have in tools/perf/ the same function used in the kernel:

/**
 * rounddown_pow_of_two - round the given value down to nearest power of
 * two
 * @n - parameter
 *
 * round the given value down to the nearest power of two
 * - the result is undefined when n == 0
 * - this can be used to initialise global variables from constant data
 */
#define rounddown_pow_of_two(n)                 \
(                                               \
        __builtin_constant_p(n) ? (             \
                (1UL << ilog2(n))) :            \
        __rounddown_pow_of_two(n)               \
 )

> +
> +static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
> +{
> +     if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
> +             uint64_t period;
> +
> +             period = intel_pt_lower_power_of_2(decoder->period);
> +             decoder->period_mask = ~(period - 1);
> +             decoder->period_ticks = period;
> +     }
> +}
> +
> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
> +{
> +     struct intel_pt_decoder *decoder;
> +
> +     if (!params->get_trace || !params->walk_insn)
> +             return NULL;
> +
> +     decoder = malloc(sizeof(struct intel_pt_decoder));
> +     if (!decoder)
> +             return NULL;
> +
> +     memset(decoder, 0, sizeof(struct intel_pt_decoder));


We have either zalloc or calloc for the above sequence of allocating +
zeroing.

> +
> +     decoder->get_trace = params->get_trace;
> +     decoder->walk_insn = params->walk_insn;
> +     decoder->data = params->data;
> +     decoder->return_compression = params->return_compression;

I am not strict about this, but its common in reviews to ask for
aligning the =.

> +
> +     decoder->sign_bit = (uint64_t)1 << 47;
> +     decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
> +
> +     decoder->period = params->period;
> +     decoder->period_type = params->period_type;
> +
> +     intel_pt_setup_period(decoder);
> +
> +     return decoder;
> +}
> +
> +static void intel_pt_pop_blk(struct intel_pt_stack *stack)
> +{
> +     struct intel_pt_blk *blk;
> +
> +     blk = stack->blk;


Also not strict about this, but this makes the function shorter:

+       struct intel_pt_blk *blk = stack->blk;

> +     stack->blk = blk->prev;
> +     if (!stack->spare)
> +             stack->spare = blk;
> +     else
> +             free(blk);
> +}
> +
> +static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
> +{
> +     if (!stack->pos) {
> +             if (!stack->blk)
> +                     return 0;
> +             intel_pt_pop_blk(stack);
> +             if (!stack->blk)
> +                     return 0;
> +             stack->pos = INTEL_PT_BLK_SIZE;
> +     }
> +     return stack->blk->ip[--stack->pos];
> +}
> +
> +static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
> +{
> +     struct intel_pt_blk *blk;
> +
> +     if (stack->spare) {
> +             blk = stack->spare;
> +             stack->spare = NULL;
> +     } else {
> +             blk = malloc(sizeof(struct intel_pt_blk));
> +             if (!blk)
> +                     return -ENOMEM;
> +     }
> +
> +     blk->prev = stack->blk;
> +     stack->blk = blk;
> +     stack->pos = 0;
> +     return 0;
> +}
> +
> +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
> +{
> +     int err;
> +
> +     if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
> +             err = intel_pt_alloc_blk(stack);
> +             if (err)
> +                     return err;
> +     }
> +
> +     stack->blk->ip[stack->pos++] = ip;
> +     return 0;
> +}


All of those routines is not "intel_pt" specific at all, right?

> +
> +static void intel_pt_clear_stack(struct intel_pt_stack *stack)
> +{
> +     while (stack->blk)
> +             intel_pt_pop_blk(stack);
> +     stack->pos = 0;
> +}
> +
> +static void intel_pt_free_stack(struct intel_pt_stack *stack)
> +{
> +     intel_pt_clear_stack(stack);
> +     free(stack->blk);
> +     free(stack->spare);
> +}

zfree was introduced to zero out these variables, i.e.:

        zfree(&stack->blk);
        zfree(&stack->spare);

> +
> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
> +{
> +     intel_pt_free_stack(&decoder->stack);
> +     free(decoder);
> +}
> +
> +const char *intel_pt_error_message(int code)
> +{
> +     switch (code) {
> +     case ENOMEM:
> +             return "Memory allocation failed";
> +     case ENOSYS:
> +             return "Internal error";
> +     case EBADMSG:
> +             return "Bad packet";
> +     case ENODATA:
> +             return "No more data";
> +     case EILSEQ:
> +             return "Failed to get instruction";
> +     case ENOENT:
> +             return "Trace doesn't match instruction";
> +     case EOVERFLOW:
> +             return "Overflow packet";
> +     case ESHUTDOWN:
> +             return "Trace stop packet";
> +     default:
> +             return "Unknown error!";
> +     }


The above idiom uses intel_pt__strerror(int err) {} elsewhere, i.e. a
way to map a errno to a string ios called "strerror", see for instance:

[acme@zoo linux]$ grep __strerror tools/perf/*.c
tools/perf/builtin-kvm.c:               target__strerror(&kvm->opts.target, 
err, errbuf, BUFSIZ);
tools/perf/builtin-record.c:            target__strerror(&rec->opts.target, 
err, errbuf, BUFSIZ);
tools/perf/builtin-record.c:            target__strerror(&rec->opts.target, 
err, errbuf, BUFSIZ);
tools/perf/builtin-top.c:                               
dso__strerror_load(al.map->dso, serr, sizeof(serr));
tools/perf/builtin-top.c:               target__strerror(target, status, 
errbuf, BUFSIZ);
tools/perf/builtin-top.c:               target__strerror(target, status, 
errbuf, BUFSIZ);
tools/perf/builtin-trace.c:     debugfs__strerror_open_tp(errno, errbuf, 
sizeof(errbuf), "sched", "sched_stat_runtime");
tools/perf/builtin-trace.c:     debugfs__strerror_open_tp(errno, errbuf, 
sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
tools/perf/builtin-trace.c:     perf_evlist__strerror_mmap(evlist, errno, 
errbuf, sizeof(errbuf));
tools/perf/builtin-trace.c:     perf_evlist__strerror_open(evlist, errno, 
errbuf, sizeof(errbuf));
tools/perf/builtin-trace.c:             target__strerror(&trace.opts.target, 
err, bf, sizeof(bf));
tools/perf/builtin-trace.c:             target__strerror(&trace.opts.target, 
err, bf, sizeof(bf));
[acme@zoo linux]$ 

Also, we try to be consistennt in separating the class name (intel_pt) from the
method (strerror).

> +}
> +
> +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
> +                              const struct intel_pt_pkt *packet,
> +                              uint64_t last_ip)
> +{
> +     uint64_t ip;
> +
> +     switch (packet->count) {
> +     case 2:
> +             ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
> +                  packet->payload;
> +             break;
> +     case 4:
> +             ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
> +                  packet->payload;
> +             break;
> +     case 6:
> +             ip = packet->payload;
> +             break;
> +     default:
> +             return 0;
> +     }
> +
> +     if (ip & decoder->sign_bit)
> +             return ip | decoder->sign_bits;
> +
> +     return ip;
> +}
> +
> +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
> +{
> +     decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
> +                                         decoder->last_ip);
> +}
> +
> +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
> +{
> +     intel_pt_set_last_ip(decoder);
> +     decoder->ip = decoder->last_ip;
> +}
> +
> +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
> +{
> +     intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
> +                         decoder->buf);
> +}
> +
> +static int intel_pt_bug(struct intel_pt_decoder *decoder)
> +{
> +     intel_pt_log("ERROR: Internal error\n");
> +     decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
> +     return -ENOSYS;
> +}
> +
> +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
> +{
> +     decoder->tx_flags = 0;
> +}
> +
> +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
> +{
> +     decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
> +}
> +
> +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
> +{
> +     intel_pt_clear_tx_flags(decoder);
> +     decoder->pkt_len = 1;
> +     decoder->pkt_step = 1;
> +     intel_pt_decoder_log_packet(decoder);
> +     if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
> +             intel_pt_log("ERROR: Bad packet\n");


And all this intel_pt_log(), do we really need a separate logging facility?
What is wrong with using pr_err(), pr_warning(), pr_debug(), as tools/ try to
use (we need to improve that more, but still), and the kernel as well?

> +             decoder->pkt_state = INTEL_PT_STATE_ERR1;
> +     }
> +     return -EBADMSG;
> +}
> +
> +static int intel_pt_get_data(struct intel_pt_decoder *decoder)
> +{
> +     struct intel_pt_buffer buffer = { .buf = 0, };
> +     int ret;
> +
> +     decoder->pkt_step = 0;
> +
> +     intel_pt_log("Getting more data\n");
> +     ret = decoder->get_trace(&buffer, decoder->data);
> +     if (ret)
> +             return ret;
> +     decoder->buf = buffer.buf;
> +     decoder->len = buffer.len;
> +     if (!decoder->len) {
> +             intel_pt_log("No more data\n");
> +             return -ENODATA;
> +     }
> +     if (!buffer.consecutive) {
> +             decoder->ip = 0;
> +             decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
> +             decoder->ref_timestamp = buffer.ref_timestamp;
> +             decoder->timestamp = 0;
> +             decoder->state.trace_nr = buffer.trace_nr;
> +             intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
> +                          decoder->ref_timestamp);
> +             return -ENOLINK;
> +     }
> +
> +     return 0;
> +}
> +
> +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
> +{
> +     if (!decoder->next_buf)
> +             return intel_pt_get_data(decoder);
> +
> +     decoder->buf = decoder->next_buf;
> +     decoder->len = decoder->next_len;
> +     decoder->next_buf = 0;
> +     decoder->next_len = 0;
> +     return 0;
> +}
> +
> +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
> +{
> +     unsigned char *buf = decoder->temp_buf;
> +     size_t old_len, len, n;
> +     int ret;
> +
> +     old_len = decoder->len;
> +     len = decoder->len;
> +     memcpy(buf, decoder->buf, len);
> +
> +     ret = intel_pt_get_data(decoder);
> +     if (ret) {
> +             decoder->pos += old_len;
> +             return ret < 0 ? ret : -EINVAL;
> +     }
> +
> +     n = INTEL_PT_PKT_MAX_SZ - len;
> +     if (n > decoder->len)
> +             n = decoder->len;
> +     memcpy(buf + len, decoder->buf, n);
> +     len += n;
> +
> +     ret = intel_pt_get_packet(buf, len, &decoder->packet);
> +     if (ret < (int)old_len) {
> +             decoder->next_buf = decoder->buf;
> +             decoder->next_len = decoder->len;
> +             decoder->buf = buf;
> +             decoder->len = old_len;
> +             return intel_pt_bad_packet(decoder);
> +     }
> +
> +     decoder->next_buf = decoder->buf + (ret - old_len);
> +     decoder->next_len = decoder->len - (ret - old_len);
> +
> +     decoder->buf = buf;
> +     decoder->len = ret;
> +
> +     return ret;
> +}
> +
> +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
> +{
> +     int ret;
> +
> +     do {
> +             decoder->pos += decoder->pkt_step;
> +             decoder->buf += decoder->pkt_step;
> +             decoder->len -= decoder->pkt_step;
> +
> +             if (!decoder->len) {
> +                     ret = intel_pt_get_next_data(decoder);
> +                     if (ret)
> +                             return ret;
> +             }
> +
> +             ret = intel_pt_get_packet(decoder->buf, decoder->len,
> +                                       &decoder->packet);
> +             if (ret == INTEL_PT_NEED_MORE_BYTES &&
> +                 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
> +                     ret = intel_pt_get_split_packet(decoder);
> +                     if (ret < 0)
> +                             return ret;
> +             }
> +             if (ret <= 0)
> +                     return intel_pt_bad_packet(decoder);
> +
> +             decoder->pkt_len = ret;
> +             decoder->pkt_step = ret;
> +             intel_pt_decoder_log_packet(decoder);
> +     } while (decoder->packet.type == INTEL_PT_PAD);
> +
> +     return 0;
> +}
> +
> +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
> +{
> +     uint64_t timestamp, masked_timestamp;
> +
> +     timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
> +     masked_timestamp = timestamp & decoder->period_mask;
> +     if (decoder->continuous_period) {
> +             if (masked_timestamp != decoder->last_masked_timestamp)
> +                     return 1;
> +     } else {
> +             timestamp += 1;
> +             masked_timestamp = timestamp & decoder->period_mask;
> +             if (masked_timestamp != decoder->last_masked_timestamp) {
> +                     decoder->last_masked_timestamp = masked_timestamp;
> +                     decoder->continuous_period = true;
> +             }
> +     }
> +     return decoder->period_ticks - (timestamp - masked_timestamp);
> +}
> +
> +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
> +{
> +     switch (decoder->period_type) {
> +     case INTEL_PT_PERIOD_INSTRUCTIONS:
> +             return decoder->period - decoder->period_insn_cnt;
> +     case INTEL_PT_PERIOD_TICKS:
> +             return intel_pt_next_period(decoder);
> +     case INTEL_PT_PERIOD_NONE:
> +     default:
> +             return 0;
> +     }
> +}
> +
> +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
> +{
> +     uint64_t timestamp, masked_timestamp;
> +
> +     switch (decoder->period_type) {
> +     case INTEL_PT_PERIOD_INSTRUCTIONS:
> +             decoder->period_insn_cnt = 0;
> +             break;
> +     case INTEL_PT_PERIOD_TICKS:
> +             timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
> +             masked_timestamp = timestamp & decoder->period_mask;
> +             decoder->last_masked_timestamp = masked_timestamp;
> +             break;
> +     case INTEL_PT_PERIOD_NONE:
> +     default:
> +             break;
> +     }
> +
> +     decoder->state.type |= INTEL_PT_INSTRUCTION;
> +}
> +
> +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
> +                           struct intel_pt_insn *intel_pt_insn, uint64_t ip)
> +{
> +     uint64_t max_insn_cnt, insn_cnt = 0;
> +     int err;
> +
> +     max_insn_cnt = intel_pt_next_sample(decoder);
> +
> +     err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
> +                              max_insn_cnt, decoder->data);
> +
> +     decoder->timestamp_insn_cnt += insn_cnt;
> +     decoder->period_insn_cnt += insn_cnt;
> +
> +     if (err) {
> +             decoder->pkt_state = INTEL_PT_STATE_ERR2;
> +             intel_pt_log_at("ERROR: Failed to get instruction",
> +                             decoder->ip);
> +             if (err == -ENOENT)
> +                     return -ENOLINK;
> +             return -EILSEQ;
> +     }
> +
> +     if (ip && decoder->ip == ip) {
> +             err = -EAGAIN;
> +             goto out;
> +     }
> +
> +     if (max_insn_cnt && insn_cnt >= max_insn_cnt)
> +             intel_pt_sample_insn(decoder);
> +
> +     if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
> +             decoder->state.type = INTEL_PT_INSTRUCTION;
> +             decoder->state.from_ip = decoder->ip;
> +             decoder->state.to_ip = 0;
> +             decoder->ip += intel_pt_insn->length;
> +             err = INTEL_PT_RETURN;
> +             goto out;
> +     }
> +
> +     if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
> +             /* Zero-length calls are excluded */
> +             if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
> +                 intel_pt_insn->rel) {
> +                     err = intel_pt_push(&decoder->stack, decoder->ip +
> +                                         intel_pt_insn->length);
> +                     if (err)
> +                             goto out;
> +             }
> +     } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
> +             decoder->ret_addr = intel_pt_pop(&decoder->stack);
> +     }
> +
> +     if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
> +             decoder->state.from_ip = decoder->ip;
> +             decoder->ip += intel_pt_insn->length +
> +                             intel_pt_insn->rel;
> +             decoder->state.to_ip = decoder->ip;
> +             err = INTEL_PT_RETURN;
> +     }
> +out:
> +     decoder->state.insn_op = intel_pt_insn->op;
> +     decoder->state.insn_len = intel_pt_insn->length;
> +
> +     if (decoder->tx_flags & INTEL_PT_IN_TX)
> +             decoder->state.flags |= INTEL_PT_IN_TX;
> +
> +     return err;
> +}
> +
> +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
> +{
> +     struct intel_pt_insn intel_pt_insn;
> +     uint64_t ip;
> +     int err;
> +
> +     ip = decoder->last_ip;
> +
> +     while (1) {
> +             err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
> +             if (err == INTEL_PT_RETURN)
> +                     return 0;
> +             if (err == -EAGAIN) {
> +                     if (decoder->set_fup_tx_flags) {
> +                             decoder->set_fup_tx_flags = false;
> +                             decoder->tx_flags = decoder->fup_tx_flags;
> +                             decoder->state.type = INTEL_PT_TRANSACTION;
> +                             decoder->state.from_ip = decoder->ip;
> +                             decoder->state.to_ip = 0;
> +                             decoder->state.flags = decoder->fup_tx_flags;
> +                             return 0;
> +                     }
> +                     return err;
> +             }
> +             decoder->set_fup_tx_flags = false;
> +             if (err)
> +                     return err;
> +
> +             if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> +                     intel_pt_log_at("ERROR: Unexpected indirect branch",
> +                                     decoder->ip);
> +                     decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +                     return -ENOENT;
> +             }
> +
> +             if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> +                     intel_pt_log_at("ERROR: Unexpected conditional branch",
> +                                     decoder->ip);
> +                     decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +                     return -ENOENT;
> +             }
> +
> +             intel_pt_bug(decoder);
> +     }
> +}
> +
> +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
> +{
> +     struct intel_pt_insn intel_pt_insn;
> +     int err;
> +
> +     err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
> +     if (err == INTEL_PT_RETURN)
> +             return 0;
> +     if (err)
> +             return err;
> +
> +     if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> +             if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
> +                     decoder->pge = false;
> +                     decoder->continuous_period = false;
> +                     decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +                     decoder->state.from_ip = decoder->ip;
> +                     decoder->state.to_ip = 0;
> +                     if (decoder->packet.count != 0)
> +                             decoder->ip = decoder->last_ip;
> +             } else {
> +                     decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +                     decoder->state.from_ip = decoder->ip;
> +                     if (decoder->packet.count == 0) {
> +                             decoder->state.to_ip = 0;
> +                     } else {
> +                             decoder->state.to_ip = decoder->last_ip;
> +                             decoder->ip = decoder->last_ip;
> +                     }
> +             }
> +             return 0;
> +     }
> +
> +     if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> +             intel_pt_log_at("ERROR: Conditional branch when expecting 
> indirect branch",
> +                             decoder->ip);
> +             decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +             return -ENOENT;
> +     }
> +
> +     return intel_pt_bug(decoder);
> +}
> +
> +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
> +{
> +     struct intel_pt_insn intel_pt_insn;
> +     int err;
> +
> +     while (1) {
> +             err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
> +             if (err == INTEL_PT_RETURN)
> +                     return 0;
> +             if (err)
> +                     return err;
> +
> +             if (intel_pt_insn.op == INTEL_PT_OP_RET) {
> +                     if (!decoder->return_compression) {
> +                             intel_pt_log_at("ERROR: RET when expecting 
> conditional branch",
> +                                             decoder->ip);
> +                             decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +                             return -ENOENT;
> +                     }
> +                     if (!decoder->ret_addr) {
> +                             intel_pt_log_at("ERROR: Bad RET compression 
> (stack empty)",
> +                                             decoder->ip);
> +                             decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +                             return -ENOENT;
> +                     }
> +                     if (!(decoder->tnt.payload & BIT63)) {
> +                             intel_pt_log_at("ERROR: Bad RET compression 
> (TNT=N)",
> +                                             decoder->ip);
> +                             decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +                             return -ENOENT;
> +                     }
> +                     decoder->tnt.count -= 1;
> +                     if (!decoder->tnt.count)
> +                             decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +                     decoder->tnt.payload <<= 1;
> +                     decoder->state.from_ip = decoder->ip;
> +                     decoder->ip = decoder->ret_addr;
> +                     decoder->state.to_ip = decoder->ip;
> +                     return 0;
> +             }
> +
> +             if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> +                     /* Handle deferred TIPs */
> +                     err = intel_pt_get_next_packet(decoder);
> +                     if (err)
> +                             return err;
> +                     if (decoder->packet.type != INTEL_PT_TIP ||
> +                         decoder->packet.count == 0) {
> +                             intel_pt_log_at("ERROR: Missing deferred TIP 
> for indirect branch",
> +                                             decoder->ip);
> +                             decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +                             decoder->pkt_step = 0;
> +                             return -ENOENT;
> +                     }
> +                     intel_pt_set_last_ip(decoder);
> +                     decoder->state.from_ip = decoder->ip;
> +                     decoder->state.to_ip = decoder->last_ip;
> +                     decoder->ip = decoder->last_ip;
> +                     return 0;
> +             }
> +
> +             if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> +                     decoder->tnt.count -= 1;
> +                     if (!decoder->tnt.count)
> +                             decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +                     if (decoder->tnt.payload & BIT63) {
> +                             decoder->tnt.payload <<= 1;
> +                             decoder->state.from_ip = decoder->ip;
> +                             decoder->ip += intel_pt_insn.length +
> +                                            intel_pt_insn.rel;
> +                             decoder->state.to_ip = decoder->ip;
> +                             return 0;
> +                     }
> +                     /* Instruction sample for a non-taken branch */
> +                     if (decoder->state.type & INTEL_PT_INSTRUCTION) {
> +                             decoder->tnt.payload <<= 1;
> +                             decoder->state.type = INTEL_PT_INSTRUCTION;
> +                             decoder->state.from_ip = decoder->ip;
> +                             decoder->state.to_ip = 0;
> +                             decoder->ip += intel_pt_insn.length;
> +                             return 0;
> +                     }
> +                     decoder->ip += intel_pt_insn.length;
> +                     if (!decoder->tnt.count)
> +                             return -EAGAIN;
> +                     decoder->tnt.payload <<= 1;
> +                     continue;
> +             }
> +
> +             return intel_pt_bug(decoder);
> +     }
> +}
> +
> +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
> +{
> +     unsigned int fup_tx_flags;
> +     int err;
> +
> +     fup_tx_flags = decoder->packet.payload &
> +                    (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
> +     err = intel_pt_get_next_packet(decoder);
> +     if (err)
> +             return err;
> +     if (decoder->packet.type == INTEL_PT_FUP) {
> +             decoder->fup_tx_flags = fup_tx_flags;
> +             decoder->set_fup_tx_flags = true;
> +             if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
> +                     *no_tip = true;
> +     } else {
> +             intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
> +                             decoder->pos);
> +             intel_pt_update_in_tx(decoder);
> +     }
> +     return 0;
> +}
> +
> +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
> +{
> +     uint64_t timestamp;
> +
> +     if (decoder->ref_timestamp) {
> +             timestamp = decoder->packet.payload |
> +                         (decoder->ref_timestamp & (0xffULL << 56));
> +             if (timestamp < decoder->ref_timestamp) {
> +                     if (decoder->ref_timestamp - timestamp > (1ULL << 55))
> +                             timestamp += (1ULL << 56);
> +             } else {
> +                     if (timestamp - decoder->ref_timestamp > (1ULL << 55))
> +                             timestamp -= (1ULL << 56);
> +             }
> +             decoder->tsc_timestamp = timestamp;
> +             decoder->timestamp = timestamp;
> +             decoder->ref_timestamp = 0;
> +             decoder->timestamp_insn_cnt = 0;
> +     } else if (decoder->timestamp) {
> +             timestamp = decoder->packet.payload |
> +                         (decoder->timestamp & (0xffULL << 56));
> +             if (timestamp < decoder->timestamp &&
> +                 decoder->timestamp - timestamp < 0x100) {
> +                     intel_pt_log_to("ERROR: Suppressing backwards 
> timestamp",
> +                                     timestamp);
> +                     timestamp = decoder->timestamp;
> +             }
> +             while (timestamp < decoder->timestamp) {
> +                     intel_pt_log_to("Wraparound timestamp", timestamp);
> +                     timestamp += (1ULL << 56);
> +             }
> +             decoder->tsc_timestamp = timestamp;
> +             decoder->timestamp = timestamp;
> +             decoder->timestamp_insn_cnt = 0;
> +     }
> +
> +     intel_pt_log_to("Setting timestamp", decoder->timestamp);
> +}
> +
> +static int intel_pt_overflow(struct intel_pt_decoder *decoder)
> +{
> +     intel_pt_log("ERROR: Buffer overflow\n");
> +     intel_pt_clear_tx_flags(decoder);
> +     decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +     decoder->overflow = true;
> +     return -EOVERFLOW;
> +}
> +
> +/* Walk PSB+ packets when already in sync. */
> +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
> +{
> +     int err;
> +
> +     while (1) {
> +             err = intel_pt_get_next_packet(decoder);
> +             if (err)
> +                     return err;
> +
> +             switch (decoder->packet.type) {
> +             case INTEL_PT_PSBEND:
> +                     return 0;
> +
> +             case INTEL_PT_TIP_PGD:
> +             case INTEL_PT_TIP_PGE:
> +             case INTEL_PT_TIP:
> +             case INTEL_PT_TNT:
> +             case INTEL_PT_BAD:
> +             case INTEL_PT_PSB:
> +                     intel_pt_log("ERROR: Unexpected packet\n");
> +                     return -EAGAIN;
> +
> +             case INTEL_PT_OVF:
> +                     return intel_pt_overflow(decoder);
> +
> +             case INTEL_PT_TSC:
> +                     intel_pt_calc_tsc_timestamp(decoder);
> +                     break;
> +
> +             case INTEL_PT_CBR:
> +                     decoder->cbr = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_EXEC:
> +                     decoder->exec_mode = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_PIP:
> +                     decoder->cr3 = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_FUP:
> +                     decoder->pge = true;
> +                     break;
> +
> +             case INTEL_PT_MODE_TSX:
> +                     intel_pt_update_in_tx(decoder);
> +                     break;
> +
> +             case INTEL_PT_PAD:
> +             default:
> +                     break;
> +             }
> +     }
> +}
> +
> +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
> +{
> +     int err;
> +
> +     if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
> +             decoder->tx_flags = 0;
> +             decoder->state.flags &= ~INTEL_PT_IN_TX;
> +             decoder->state.flags |= INTEL_PT_ABORT_TX;
> +     } else {
> +             decoder->state.flags |= INTEL_PT_ASYNC;
> +     }
> +
> +     while (1) {
> +             err = intel_pt_get_next_packet(decoder);
> +             if (err)
> +                     return err;
> +
> +             switch (decoder->packet.type) {
> +             case INTEL_PT_TNT:
> +             case INTEL_PT_FUP:
> +             case INTEL_PT_PSB:
> +             case INTEL_PT_TSC:
> +             case INTEL_PT_CBR:
> +             case INTEL_PT_MODE_TSX:
> +             case INTEL_PT_BAD:
> +             case INTEL_PT_PSBEND:
> +                     intel_pt_log("ERROR: Missing TIP after FUP\n");
> +                     decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +                     return -ENOENT;
> +
> +             case INTEL_PT_OVF:
> +                     return intel_pt_overflow(decoder);
> +
> +             case INTEL_PT_TIP_PGD:
> +                     decoder->state.from_ip = decoder->ip;
> +                     decoder->state.to_ip = 0;
> +                     if (decoder->packet.count != 0) {
> +                             intel_pt_set_ip(decoder);
> +                             intel_pt_log("Omitting PGD ip " x64_fmt "\n",
> +                                          decoder->ip);
> +                     }
> +                     decoder->pge = false;
> +                     decoder->continuous_period = false;
> +                     return 0;
> +
> +             case INTEL_PT_TIP_PGE:
> +                     decoder->pge = true;
> +                     intel_pt_log("Omitting PGE ip " x64_fmt "\n",
> +                                  decoder->ip);
> +                     decoder->state.from_ip = 0;
> +                     if (decoder->packet.count == 0) {
> +                             decoder->state.to_ip = 0;
> +                     } else {
> +                             intel_pt_set_ip(decoder);
> +                             decoder->state.to_ip = decoder->ip;
> +                     }
> +                     return 0;
> +
> +             case INTEL_PT_TIP:
> +                     decoder->state.from_ip = decoder->ip;
> +                     if (decoder->packet.count == 0) {
> +                             decoder->state.to_ip = 0;
> +                     } else {
> +                             intel_pt_set_ip(decoder);
> +                             decoder->state.to_ip = decoder->ip;
> +                     }
> +                     return 0;
> +
> +             case INTEL_PT_PIP:
> +                     decoder->cr3 = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_EXEC:
> +                     decoder->exec_mode = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_PAD:
> +                     break;
> +
> +             default:
> +                     return intel_pt_bug(decoder);
> +             }
> +     }
> +}
> +
> +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
> +{
> +     bool no_tip = false;
> +     int err;
> +
> +     while (1) {
> +             err = intel_pt_get_next_packet(decoder);
> +             if (err)
> +                     return err;
> +next:
> +             switch (decoder->packet.type) {
> +             case INTEL_PT_TNT:
> +                     if (!decoder->packet.count)
> +                             break;
> +                     decoder->tnt = decoder->packet;
> +                     decoder->pkt_state = INTEL_PT_STATE_TNT;
> +                     err = intel_pt_walk_tnt(decoder);
> +                     if (err == -EAGAIN)
> +                             break;
> +                     return err;
> +
> +             case INTEL_PT_TIP_PGD:
> +                     if (decoder->packet.count != 0)
> +                             intel_pt_set_last_ip(decoder);
> +                     decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
> +                     return intel_pt_walk_tip(decoder);
> +
> +             case INTEL_PT_TIP_PGE: {
> +                     decoder->pge = true;
> +                     if (decoder->packet.count == 0) {
> +                             intel_pt_log_at("Skipping zero TIP.PGE",
> +                                             decoder->pos);
> +                             break;
> +                     }
> +                     intel_pt_set_ip(decoder);
> +                     decoder->state.from_ip = 0;
> +                     decoder->state.to_ip = decoder->ip;
> +                     return 0;
> +             }
> +
> +             case INTEL_PT_OVF:
> +                     return intel_pt_overflow(decoder);
> +
> +             case INTEL_PT_TIP:
> +                     if (decoder->packet.count != 0)
> +                             intel_pt_set_last_ip(decoder);
> +                     decoder->pkt_state = INTEL_PT_STATE_TIP;
> +                     return intel_pt_walk_tip(decoder);
> +
> +             case INTEL_PT_FUP:
> +                     if (decoder->packet.count == 0) {
> +                             intel_pt_log_at("Skipping zero FUP",
> +                                             decoder->pos);
> +                             no_tip = false;
> +                             break;
> +                     }
> +                     intel_pt_set_last_ip(decoder);
> +                     err = intel_pt_walk_fup(decoder);
> +                     if (err != -EAGAIN) {
> +                             if (err)
> +                                     return err;
> +                             if (no_tip)
> +                                     decoder->pkt_state =
> +                                             INTEL_PT_STATE_FUP_NO_TIP;
> +                             else
> +                                     decoder->pkt_state = INTEL_PT_STATE_FUP;
> +                             return 0;
> +                     }
> +                     if (no_tip) {
> +                             no_tip = false;
> +                             break;
> +                     }
> +                     return intel_pt_walk_fup_tip(decoder);
> +
> +             case INTEL_PT_PSB:
> +                     intel_pt_clear_stack(&decoder->stack);
> +                     err = intel_pt_walk_psbend(decoder);
> +                     if (err == -EAGAIN)
> +                             goto next;
> +                     if (err)
> +                             return err;
> +                     break;
> +
> +             case INTEL_PT_PIP:
> +                     decoder->cr3 = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_TSC:
> +                     intel_pt_calc_tsc_timestamp(decoder);
> +                     break;
> +
> +             case INTEL_PT_CBR:
> +                     decoder->cbr = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_EXEC:
> +                     decoder->exec_mode = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_TSX:
> +                     /* MODE_TSX need not be followed by FUP */
> +                     if (!decoder->pge) {
> +                             intel_pt_update_in_tx(decoder);
> +                             break;
> +                     }
> +                     err = intel_pt_mode_tsx(decoder, &no_tip);
> +                     if (err)
> +                             return err;
> +                     goto next;
> +
> +             case INTEL_PT_BAD: /* Does not happen */
> +                     return intel_pt_bug(decoder);
> +
> +             case INTEL_PT_PSBEND:
> +             case INTEL_PT_PAD:
> +                     break;
> +
> +             default:
> +                     return intel_pt_bug(decoder);
> +             }
> +     }
> +}
> +
> +/* Walk PSB+ packets to get in sync. */
> +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
> +{
> +     int err;
> +
> +     while (1) {
> +             err = intel_pt_get_next_packet(decoder);
> +             if (err)
> +                     return err;
> +
> +             switch (decoder->packet.type) {
> +             case INTEL_PT_TIP_PGD:
> +                     decoder->continuous_period = false;
> +             case INTEL_PT_TIP_PGE:
> +             case INTEL_PT_TIP:
> +                     intel_pt_log("ERROR: Unexpected packet\n");
> +                     return -ENOENT;
> +
> +             case INTEL_PT_FUP:
> +                     decoder->pge = true;
> +                     if (decoder->last_ip || decoder->packet.count == 6 ||
> +                         decoder->packet.count == 0) {
> +                             uint64_t current_ip = decoder->ip;
> +
> +                             intel_pt_set_ip(decoder);
> +                             if (current_ip)
> +                                     intel_pt_log_to("Setting IP",
> +                                                     decoder->ip);
> +                     }
> +                     break;
> +
> +             case INTEL_PT_TSC:
> +                     intel_pt_calc_tsc_timestamp(decoder);
> +                     break;
> +
> +             case INTEL_PT_CBR:
> +                     decoder->cbr = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_PIP:
> +                     decoder->cr3 = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_EXEC:
> +                     decoder->exec_mode = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_TSX:
> +                     intel_pt_update_in_tx(decoder);
> +                     break;
> +
> +             case INTEL_PT_TNT:
> +                     intel_pt_log("ERROR: Unexpected packet\n");
> +                     if (decoder->ip)
> +                             decoder->pkt_state = INTEL_PT_STATE_ERR4;
> +                     else
> +                             decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +                     return -ENOENT;
> +
> +             case INTEL_PT_BAD: /* Does not happen */
> +                     return intel_pt_bug(decoder);
> +
> +             case INTEL_PT_OVF:
> +                     return intel_pt_overflow(decoder);
> +
> +             case INTEL_PT_PSBEND:
> +                     return 0;
> +
> +             case INTEL_PT_PSB:
> +             case INTEL_PT_PAD:
> +             default:
> +                     break;
> +             }
> +     }
> +}
> +
> +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
> +{
> +     int err;
> +
> +     while (1) {
> +             err = intel_pt_get_next_packet(decoder);
> +             if (err)
> +                     return err;
> +
> +             switch (decoder->packet.type) {
> +             case INTEL_PT_TIP_PGD:
> +                     decoder->continuous_period = false;
> +             case INTEL_PT_TIP_PGE:
> +             case INTEL_PT_TIP:
> +                     decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
> +                     if (decoder->last_ip || decoder->packet.count == 6 ||
> +                         decoder->packet.count == 0)
> +                             intel_pt_set_ip(decoder);
> +                     if (decoder->ip)
> +                             return 0;
> +                     break;
> +
> +             case INTEL_PT_FUP:
> +                     if (decoder->overflow) {
> +                             if (decoder->last_ip ||
> +                                 decoder->packet.count == 6 ||
> +                                 decoder->packet.count == 0)
> +                                     intel_pt_set_ip(decoder);
> +                             if (decoder->ip)
> +                                     return 0;
> +                     }
> +                     if (decoder->packet.count)
> +                             intel_pt_set_last_ip(decoder);
> +                     break;
> +
> +             case INTEL_PT_TSC:
> +                     intel_pt_calc_tsc_timestamp(decoder);
> +                     break;
> +
> +             case INTEL_PT_CBR:
> +                     decoder->cbr = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_PIP:
> +                     decoder->cr3 = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_EXEC:
> +                     decoder->exec_mode = decoder->packet.payload;
> +                     break;
> +
> +             case INTEL_PT_MODE_TSX:
> +                     intel_pt_update_in_tx(decoder);
> +                     break;
> +
> +             case INTEL_PT_OVF:
> +                     return intel_pt_overflow(decoder);
> +
> +             case INTEL_PT_BAD: /* Does not happen */
> +                     return intel_pt_bug(decoder);
> +
> +             case INTEL_PT_PSB:
> +                     err = intel_pt_walk_psb(decoder);
> +                     if (err)
> +                             return err;
> +                     if (decoder->ip) {
> +                             /* Do not have a sample */
> +                             decoder->state.type = 0;
> +                             return 0;
> +                     }
> +                     break;
> +
> +             case INTEL_PT_TNT:
> +             case INTEL_PT_PSBEND:
> +             case INTEL_PT_PAD:
> +             default:
> +                     break;
> +             }
> +     }
> +}
> +
> +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
> +{
> +     int err;
> +
> +     intel_pt_log("Scanning for full IP\n");
> +     err = intel_pt_walk_to_ip(decoder);
> +     if (err)
> +             return err;
> +
> +     decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +     decoder->overflow = false;
> +
> +     decoder->state.from_ip = 0;
> +     decoder->state.to_ip = decoder->ip;
> +     intel_pt_log_to("Setting IP", decoder->ip);
> +
> +     return 0;
> +}
> +
> +static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
> +{
> +     const unsigned char *end = decoder->buf + decoder->len;
> +     size_t i;
> +
> +     for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
> +             if (i > decoder->len)
> +                     continue;
> +             if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
> +                     return i;
> +     }
> +     return 0;
> +}
> +
> +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
> +{
> +     size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
> +     const char *psb = INTEL_PT_PSB_STR;
> +
> +     if (rest_psb > decoder->len ||
> +         memcmp(decoder->buf, psb + part_psb, rest_psb))
> +             return 0;
> +
> +     return rest_psb;
> +}
> +
> +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
> +                               int part_psb)
> +{
> +     int rest_psb, ret;
> +
> +     decoder->pos += decoder->len;
> +     decoder->len = 0;
> +
> +     ret = intel_pt_get_next_data(decoder);
> +     if (ret)
> +             return ret;
> +
> +     rest_psb = intel_pt_rest_psb(decoder, part_psb);
> +     if (!rest_psb)
> +             return 0;
> +
> +     decoder->pos -= part_psb;
> +     decoder->next_buf = decoder->buf + rest_psb;
> +     decoder->next_len = decoder->len - rest_psb;
> +     memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> +     decoder->buf = decoder->temp_buf;
> +     decoder->len = INTEL_PT_PSB_LEN;
> +
> +     return 0;
> +}
> +
> +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
> +{
> +     unsigned char *next;
> +     int ret;
> +
> +     intel_pt_log("Scanning for PSB\n");
> +     while (1) {
> +             if (!decoder->len) {
> +                     ret = intel_pt_get_next_data(decoder);
> +                     if (ret)
> +                             return ret;
> +             }
> +
> +             next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
> +                           INTEL_PT_PSB_LEN);
> +             if (!next) {
> +                     int part_psb;
> +
> +                     part_psb = intel_pt_part_psb(decoder);
> +                     if (part_psb) {
> +                             ret = intel_pt_get_split_psb(decoder, part_psb);
> +                             if (ret)
> +                                     return ret;
> +                     } else {
> +                             decoder->pos += decoder->len;
> +                             decoder->len = 0;
> +                     }
> +                     continue;
> +             }
> +
> +             decoder->pkt_step = next - decoder->buf;
> +             return intel_pt_get_next_packet(decoder);
> +     }
> +}
> +
> +static int intel_pt_sync(struct intel_pt_decoder *decoder)
> +{
> +     int err;
> +
> +     decoder->pge = false;
> +     decoder->continuous_period = false;
> +     decoder->last_ip = 0;
> +     decoder->ip = 0;
> +     intel_pt_clear_stack(&decoder->stack);
> +
> +     err = intel_pt_scan_for_psb(decoder);
> +     if (err)
> +             return err;
> +
> +     decoder->pkt_state = INTEL_PT_STATE_NO_IP;
> +
> +     err = intel_pt_walk_psb(decoder);
> +     if (err)
> +             return err;
> +
> +     if (decoder->ip) {
> +             decoder->state.type = 0; /* Do not have a sample */
> +             decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +     } else {
> +             return intel_pt_sync_ip(decoder);
> +     }
> +
> +     return 0;
> +}
> +
> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder 
> *decoder)
> +{
> +     int err;
> +
> +     do {
> +             decoder->state.type = INTEL_PT_BRANCH;
> +             decoder->state.flags = 0;
> +
> +             switch (decoder->pkt_state) {
> +             case INTEL_PT_STATE_NO_PSB:
> +                     err = intel_pt_sync(decoder);
> +                     break;
> +             case INTEL_PT_STATE_NO_IP:
> +                     decoder->last_ip = 0;
> +                     /* Fall through */
> +             case INTEL_PT_STATE_ERR_RESYNC:
> +                     err = intel_pt_sync_ip(decoder);
> +                     break;
> +             case INTEL_PT_STATE_IN_SYNC:
> +                     err = intel_pt_walk_trace(decoder);
> +                     break;
> +             case INTEL_PT_STATE_TNT:
> +                     err = intel_pt_walk_tnt(decoder);
> +                     if (err == -EAGAIN)
> +                             err = intel_pt_walk_trace(decoder);
> +                     break;
> +             case INTEL_PT_STATE_TIP:
> +             case INTEL_PT_STATE_TIP_PGD:
> +                     err = intel_pt_walk_tip(decoder);
> +                     break;
> +             case INTEL_PT_STATE_FUP:
> +                     decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +                     err = intel_pt_walk_fup(decoder);
> +                     if (err == -EAGAIN)
> +                             err = intel_pt_walk_fup_tip(decoder);
> +                     else if (!err)
> +                             decoder->pkt_state = INTEL_PT_STATE_FUP;
> +                     break;
> +             case INTEL_PT_STATE_FUP_NO_TIP:
> +                     decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +                     err = intel_pt_walk_fup(decoder);
> +                     if (err == -EAGAIN)
> +                             err = intel_pt_walk_trace(decoder);
> +                     break;
> +             default:
> +                     err = intel_pt_bug(decoder);
> +                     break;
> +             }
> +     } while (err == -ENOLINK);
> +
> +     decoder->state.err = err;
> +     decoder->state.timestamp = decoder->timestamp;
> +     decoder->state.est_timestamp = decoder->timestamp +
> +                                    (decoder->timestamp_insn_cnt << 1);
> +     decoder->state.cr3 = decoder->cr3;
> +
> +     if (err)
> +             decoder->state.from_ip = decoder->ip;
> +
> +     return &decoder->state;
> +}
> +
> +static bool intel_pt_at_psb(unsigned char *buf, size_t len)
> +{
> +     if (len < INTEL_PT_PSB_LEN)
> +             return false;
> +     return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
> +                   INTEL_PT_PSB_LEN);
> +}
> +
> +/**
> + * intel_pt_next_psb - move buffer pointer to the start of the next PSB 
> packet.
> + * @buf: pointer to buffer pointer
> + * @len: size of buffer
> + *
> + * Updates the buffer pointer to point to the start of the next PSB packet if
> + * there is one, otherwise the buffer pointer is unchanged.  If @buf is 
> updated,
> + * @len is adjusted accordingly.
> + *
> + * Return: %true if a PSB packet is found, %false otherwise.
> + */
> +static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
> +{
> +     unsigned char *next;
> +
> +     next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> +     if (next) {
> +             *len -= next - *buf;
> +             *buf = next;
> +             return true;
> +     }
> +     return false;
> +}
> +
> +/**
> + * intel_pt_step_psb - move buffer pointer to the start of the following PSB
> + *                     packet.
> + * @buf: pointer to buffer pointer
> + * @len: size of buffer
> + *
> + * Updates the buffer pointer to point to the start of the following PSB 
> packet
> + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
> + * pointer is unchanged.  If @buf is updated, @len is adjusted accordingly.
> + *
> + * Return: %true if a PSB packet is found, %false otherwise.
> + */
> +static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
> +{
> +     unsigned char *next;
> +
> +     if (!*len)
> +             return false;
> +
> +     next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> +     if (next) {
> +             *len -= next - *buf;
> +             *buf = next;
> +             return true;
> +     }
> +     return false;
> +}
> +
> +/**
> + * intel_pt_last_psb - find the last PSB packet in a buffer.
> + * @buf: buffer
> + * @len: size of buffer
> + *
> + * This function finds the last PSB in a buffer.
> + *
> + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
> + */
> +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
> +{
> +     const char *n = INTEL_PT_PSB_STR;
> +     unsigned char *p;
> +     size_t k;
> +
> +     if (len < INTEL_PT_PSB_LEN)
> +             return NULL;
> +
> +     k = len - INTEL_PT_PSB_LEN + 1;
> +     while (1) {
> +             p = memrchr(buf, n[0], k);
> +             if (!p)
> +                     return NULL;
> +             if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
> +                     return p;
> +             k = p - buf;
> +             if (!k)
> +                     return NULL;
> +     }
> +}
> +
> +/**
> + * intel_pt_next_tsc - find and return next TSC.
> + * @buf: buffer
> + * @len: size of buffer
> + * @tsc: TSC value returned
> + *
> + * Find a TSC packet in @buf and return the TSC value.  This function assumes
> + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
> + * PSBEND packet is found.
> + *
> + * Return: %true if TSC is found, false otherwise.
> + */
> +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
> +{
> +     struct intel_pt_pkt packet;
> +     int ret;
> +
> +     while (len) {
> +             ret = intel_pt_get_packet(buf, len, &packet);
> +             if (ret <= 0)
> +                     return false;
> +             if (packet.type == INTEL_PT_TSC) {
> +                     *tsc = packet.payload;
> +                     return true;
> +             }
> +             if (packet.type == INTEL_PT_PSBEND)
> +                     return false;
> +             buf += ret;
> +             len -= ret;
> +     }
> +     return false;
> +}
> +
> +/**
> + * intel_pt_tsc_cmp - compare 7-byte TSCs.
> + * @tsc1: first TSC to compare
> + * @tsc2: second TSC to compare
> + *
> + * This function compares 7-byte TSC values allowing for the possibility that
> + * TSC wrapped around.  Generally it is not possible to know if TSC has 
> wrapped
> + * around so for that purpose this function assumes the absolute difference 
> is
> + * less than half the maximum difference.
> + *
> + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
> + * after @tsc2.
> + */
> +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
> +{
> +     const uint64_t halfway = (1ULL << 55);
> +
> +     if (tsc1 == tsc2)
> +             return 0;
> +
> +     if (tsc1 < tsc2) {
> +             if (tsc2 - tsc1 < halfway)
> +                     return -1;
> +             else
> +                     return 1;
> +     } else {
> +             if (tsc1 - tsc2 < halfway)
> +                     return 1;
> +             else
> +                     return -1;
> +     }
> +}
> +
> +/**
> + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
> + *                             using TSC.
> + * @buf_a: first buffer
> + * @len_a: size of first buffer
> + * @buf_b: second buffer
> + * @len_b: size of second buffer
> + *
> + * If the trace contains TSC we can look at the last TSC of @buf_a and the
> + * first TSC of @buf_b in order to determine if the buffers overlap, and then
> + * walk forward in @buf_b until a later TSC is found.  A precondition is that
> + * @buf_a and @buf_b are positioned at a PSB.
> + *
> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
> + * @buf_b + @len_b if there is no non-overlapped data.
> + */
> +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
> +                                             size_t len_a,
> +                                             unsigned char *buf_b,
> +                                             size_t len_b)
> +{
> +     uint64_t tsc_a, tsc_b;
> +     unsigned char *p;
> +     size_t len;
> +
> +     p = intel_pt_last_psb(buf_a, len_a);
> +     if (!p)
> +             return buf_b; /* No PSB in buf_a => no overlap */
> +
> +     len = len_a - (p - buf_a);
> +     if (!intel_pt_next_tsc(p, len, &tsc_a)) {
> +             /* The last PSB+ in buf_a is incomplete, so go back one more */
> +             len_a -= len;
> +             p = intel_pt_last_psb(buf_a, len_a);
> +             if (!p)
> +                     return buf_b; /* No full PSB+ => assume no overlap */
> +             len = len_a - (p - buf_a);
> +             if (!intel_pt_next_tsc(p, len, &tsc_a))
> +                     return buf_b; /* No TSC in buf_a => assume no overlap */
> +     }
> +
> +     while (1) {
> +             /* Ignore PSB+ with no TSC */
> +             if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
> +                 intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
> +                     return buf_b; /* tsc_a < tsc_b => no overlap */
> +
> +             if (!intel_pt_step_psb(&buf_b, &len_b))
> +                     return buf_b + len_b; /* No PSB in buf_b => no data */
> +     }
> +}
> +
> +/**
> + * intel_pt_find_overlap - determine start of non-overlapped trace data.
> + * @buf_a: first buffer
> + * @len_a: size of first buffer
> + * @buf_b: second buffer
> + * @len_b: size of second buffer
> + * @have_tsc: can use TSC packets to detect overlap
> + *
> + * When trace samples or snapshots are recorded there is the possibility that
> + * the data overlaps.  Note that, for the purposes of decoding, data is only
> + * useful if it begins with a PSB packet.
> + *
> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
> + * @buf_b + @len_b if there is no non-overlapped data.
> + */
> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
> +                                  unsigned char *buf_b, size_t len_b,
> +                                  bool have_tsc)
> +{
> +     unsigned char *found;
> +
> +     /* Buffer 'b' must start at PSB so throw away everything before that */
> +     if (!intel_pt_next_psb(&buf_b, &len_b))
> +             return buf_b + len_b; /* No PSB */
> +
> +     if (!intel_pt_next_psb(&buf_a, &len_a))
> +             return buf_b; /* No overlap */
> +
> +     if (have_tsc) {
> +             found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
> +             if (found)
> +                     return found;
> +     }
> +
> +     /*
> +      * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
> +      * we can ignore the first part of buffer 'a'.
> +      */
> +     while (len_b < len_a) {
> +             if (!intel_pt_step_psb(&buf_a, &len_a))
> +                     return buf_b; /* No overlap */
> +     }
> +
> +     /* Now len_b >= len_a */
> +     if (len_b > len_a) {
> +             /* The leftover buffer 'b' must start at a PSB */
> +             while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
> +                     if (!intel_pt_step_psb(&buf_a, &len_a))
> +                             return buf_b; /* No overlap */
> +             }
> +     }
> +
> +     while (1) {
> +             /* Potential overlap so check the bytes */
> +             found = memmem(buf_a, len_a, buf_b, len_a);
> +             if (found)
> +                     return buf_b + len_a;
> +
> +             /* Try again at next PSB in buffer 'a' */
> +             if (!intel_pt_step_psb(&buf_a, &len_a))
> +                     return buf_b; /* No overlap */
> +
> +             /* The leftover buffer 'b' must start at a PSB */
> +             while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
> +                     if (!intel_pt_step_psb(&buf_a, &len_a))
> +                             return buf_b; /* No overlap */
> +             }
> +     }
> +}
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h 
> b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> new file mode 100644
> index 0000000..e55615a
> --- /dev/null
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> @@ -0,0 +1,89 @@
> +/*
> + * intel_pt_decoder.h: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef INCLUDE__INTEL_PT_DECODER_H__
> +#define INCLUDE__INTEL_PT_DECODER_H__
> +
> +#include <stdint.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +
> +#include "intel-pt-insn-decoder.h"
> +
> +#define INTEL_PT_IN_TX               (1 << 0)
> +#define INTEL_PT_ABORT_TX    (1 << 1)
> +#define INTEL_PT_ASYNC               (1 << 2)
> +
> +enum intel_pt_sample_type {
> +     INTEL_PT_BRANCH         = 1 << 0,
> +     INTEL_PT_INSTRUCTION    = 1 << 1,
> +     INTEL_PT_TRANSACTION    = 1 << 2,
> +};
> +
> +enum intel_pt_period_type {
> +     INTEL_PT_PERIOD_NONE,
> +     INTEL_PT_PERIOD_INSTRUCTIONS,
> +     INTEL_PT_PERIOD_TICKS,
> +};
> +
> +struct intel_pt_state {
> +     enum intel_pt_sample_type type;
> +     int err;
> +     uint64_t from_ip;
> +     uint64_t to_ip;
> +     uint64_t cr3;
> +     uint64_t timestamp;
> +     uint64_t est_timestamp;
> +     uint64_t trace_nr;
> +     uint32_t flags;
> +     enum intel_pt_insn_op insn_op;
> +     int insn_len;
> +};
> +
> +struct intel_pt_insn;
> +
> +struct intel_pt_buffer {
> +     const unsigned char *buf;
> +     size_t len;
> +     bool consecutive;
> +     uint64_t ref_timestamp;
> +     uint64_t trace_nr;
> +};
> +
> +struct intel_pt_params {
> +     int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
> +     int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
> +                      uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
> +                      uint64_t max_insn_cnt, void *data);
> +     void *data;
> +     bool return_compression;
> +     uint64_t period;
> +     enum intel_pt_period_type period_type;
> +};
> +
> +struct intel_pt_decoder;
> +
> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params 
> *params);
> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
> +
> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder 
> *decoder);
> +
> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
> +                                  unsigned char *buf_b, size_t len_b,
> +                                  bool have_tsc);
> +
> +const char *intel_pt_error_message(int code);
> +
> +#endif
> -- 
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to