Re: [Qemu-devel] [PATCH RFC 2/3] tcg/optimize: do copy propagation for memory locations

2017-11-23 Thread Kirill Batuzov
On Wed, 22 Nov 2017, Richard Henderson wrote:

> On 11/09/2017 03:41 PM, Kirill Batuzov wrote:
> > +typedef struct TCGMemLocation {
> > +/* Offset is relative to ENV. Only fields of CPUState are accounted.  
> > */
> > +tcg_target_ulong offset;
> > +tcg_target_ulong size;
> > +TCGType type;
> > +/* Pointer to a temp containing a valid copy of this memory location.  
> > */
> > +TCGTemp *copy;
> > +/* Pointer to the next memory location containing copy of the same
> > +   content.  */
> > +struct TCGMemLocation *next_copy;
> 
> Did you ever find copies of memories that weren't also copies within temps?
> I.e. you could have found this through copy->next_copy?

Yes. This happens when a temp was stored to multiple memory locations.

> 
> > +/* Double-linked list of all memory locations.  */
> > +struct TCGMemLocation *next;
> > +struct TCGMemLocation **prev_ptr;
> 
> Use QTAILQ_* for common double-linked-list manipulation.
> 
> > +struct TCGMemLocation *mem_locations;
> > +struct TCGMemLocation *free_mls;
> 
> These can't be globals anymore -- we're do multi-threaded code generation now.

Then they should be moved to TCGContext I assume?

> 
> > @@ -77,12 +125,27 @@ static void reset_ts(TCGTemp *ts)
> >  struct tcg_temp_info *pi = ts_info(ti->prev_copy);
> >  struct tcg_temp_info *ni = ts_info(ti->next_copy);
> >  
> > +if (ti->mem_loc && ts_is_copy(ts) && 0) {
> > +TCGMemLocation *ml, *nml;
> > +for (ml = ti->mem_loc; ml; ml = nml) {
> > +nml = ml->next_copy;
> > +ml->copy = ti->next_copy;
> > +ml->next_copy = ni->mem_loc;
> > +ni->mem_loc = ml;
> > +}
> > +} else {
> > +while (ti->mem_loc) {
> > +reset_ml(ti->mem_loc);
> > +}
> 
> Why would a single temp be associated with more than one memory?
>

Because it was stored to multiple memory locations. And when reading from
any of these locations we want to access the temp instead.

For example this happens when we translate ARM32 VDUP instruction. One
value is duplicated into all elements of the vector. When elements of the
vector are accessed later, we want to use this value instead of rereading
it from memory.

> > +static TCGOpcode ld_to_mov(TCGOpcode op)
> > +{
> > +#define LD_TO_EXT(sz, w) \
> > +case glue(glue(INDEX_op_ld, sz), glue(_i, w)):   \
> > +return glue(glue(INDEX_op_ext, sz), glue(_i, w))
> > +
> > +switch (op) {
> > +LD_TO_EXT(8s, 32);
> > +LD_TO_EXT(8u, 32);
> > +LD_TO_EXT(16s, 32);
> > +LD_TO_EXT(16u, 32);
> > +LD_TO_EXT(8s, 64);
> > +LD_TO_EXT(8u, 64);
> > +LD_TO_EXT(16s, 64);
> > +LD_TO_EXT(16u, 64);
> > +LD_TO_EXT(32s, 64);
> > +LD_TO_EXT(32u, 64);
> 
> How many extensions did you find?  Or is this Just In Case?
> 

One. It was in Aartch64 build of x264. So this is more Just in Case. But
it may become useful if we try to emulate some 8- or 16-bit architectures.

-- 
Kirill



Re: [Qemu-devel] [PATCH RFC 2/3] tcg/optimize: do copy propagation for memory locations

2017-11-22 Thread Richard Henderson
On 11/09/2017 03:41 PM, Kirill Batuzov wrote:
> +typedef struct TCGMemLocation {
> +/* Offset is relative to ENV. Only fields of CPUState are accounted.  */
> +tcg_target_ulong offset;
> +tcg_target_ulong size;
> +TCGType type;
> +/* Pointer to a temp containing a valid copy of this memory location.  */
> +TCGTemp *copy;
> +/* Pointer to the next memory location containing copy of the same
> +   content.  */
> +struct TCGMemLocation *next_copy;

Did you ever find copies of memories that weren't also copies within temps?
I.e. you could have found this through copy->next_copy?

> +/* Double-linked list of all memory locations.  */
> +struct TCGMemLocation *next;
> +struct TCGMemLocation **prev_ptr;

Use QTAILQ_* for common double-linked-list manipulation.

> +struct TCGMemLocation *mem_locations;
> +struct TCGMemLocation *free_mls;

These can't be globals anymore -- we're do multi-threaded code generation now.

> @@ -77,12 +125,27 @@ static void reset_ts(TCGTemp *ts)
>  struct tcg_temp_info *pi = ts_info(ti->prev_copy);
>  struct tcg_temp_info *ni = ts_info(ti->next_copy);
>  
> +if (ti->mem_loc && ts_is_copy(ts) && 0) {
> +TCGMemLocation *ml, *nml;
> +for (ml = ti->mem_loc; ml; ml = nml) {
> +nml = ml->next_copy;
> +ml->copy = ti->next_copy;
> +ml->next_copy = ni->mem_loc;
> +ni->mem_loc = ml;
> +}
> +} else {
> +while (ti->mem_loc) {
> +reset_ml(ti->mem_loc);
> +}

Why would a single temp be associated with more than one memory?

> +static TCGOpcode ld_to_mov(TCGOpcode op)
> +{
> +#define LD_TO_EXT(sz, w) \
> +case glue(glue(INDEX_op_ld, sz), glue(_i, w)):   \
> +return glue(glue(INDEX_op_ext, sz), glue(_i, w))
> +
> +switch (op) {
> +LD_TO_EXT(8s, 32);
> +LD_TO_EXT(8u, 32);
> +LD_TO_EXT(16s, 32);
> +LD_TO_EXT(16u, 32);
> +LD_TO_EXT(8s, 64);
> +LD_TO_EXT(8u, 64);
> +LD_TO_EXT(16s, 64);
> +LD_TO_EXT(16u, 64);
> +LD_TO_EXT(32s, 64);
> +LD_TO_EXT(32u, 64);

How many extensions did you find?  Or is this Just In Case?

Otherwise this looks quite reasonable.


r~



[Qemu-devel] [PATCH RFC 2/3] tcg/optimize: do copy propagation for memory locations

2017-11-09 Thread Kirill Batuzov
During copy propagation phase, keep track of memory locations that store value
of a known live variable. Only memory locations that are addressed relative to
ENV are accounted. Any other access types are handled conservatively.

When a load is encountered, source memory location is checked against list of
known memory locations. If its content is a copy of some variable, then MOV or
EXT from this variable is issued instead of load. This allows us to keep values
of some CPUState fields that are not represented by global variables on host
registers during computations involving them.

Signed-off-by: Kirill Batuzov 
---
 tcg/optimize.c | 266 +
 1 file changed, 266 insertions(+)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 847dfa44c9..da7f069444 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -38,8 +38,28 @@ struct tcg_temp_info {
 TCGTemp *next_copy;
 tcg_target_ulong val;
 tcg_target_ulong mask;
+struct TCGMemLocation *mem_loc;
 };
 
+typedef struct TCGMemLocation {
+/* Offset is relative to ENV. Only fields of CPUState are accounted.  */
+tcg_target_ulong offset;
+tcg_target_ulong size;
+TCGType type;
+/* Pointer to a temp containing a valid copy of this memory location.  */
+TCGTemp *copy;
+/* Pointer to the next memory location containing copy of the same
+   content.  */
+struct TCGMemLocation *next_copy;
+
+/* Double-linked list of all memory locations.  */
+struct TCGMemLocation *next;
+struct TCGMemLocation **prev_ptr;
+} TCGMemLocation;
+
+struct TCGMemLocation *mem_locations;
+struct TCGMemLocation *free_mls;
+
 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
 {
 return ts->state_ptr;
@@ -70,6 +90,34 @@ static inline bool arg_is_copy(TCGArg arg)
 return ts_is_copy(arg_temp(arg));
 }
 
+/* Reset MEMORY LOCATION state. */
+static void reset_ml(TCGMemLocation *ml)
+{
+if (!ml) {
+return ;
+}
+if (ml->copy) {
+TCGMemLocation **prev_ptr = _info(ml->copy)->mem_loc;
+TCGMemLocation *cur_ptr = ts_info(ml->copy)->mem_loc;
+while (cur_ptr && cur_ptr != ml) {
+prev_ptr = _ptr->next_copy;
+cur_ptr = cur_ptr->next_copy;
+}
+*prev_ptr = ml->next_copy;
+if (ts_info(ml->copy)->mem_loc == ml) {
+ts_info(ml->copy)->mem_loc = ml->next_copy;
+}
+}
+
+*ml->prev_ptr = ml->next;
+if (ml->next) {
+ml->next->prev_ptr = ml->prev_ptr;
+}
+ml->prev_ptr = NULL;
+ml->next = free_mls;
+free_mls = ml;
+}
+
 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 static void reset_ts(TCGTemp *ts)
 {
@@ -77,12 +125,27 @@ static void reset_ts(TCGTemp *ts)
 struct tcg_temp_info *pi = ts_info(ti->prev_copy);
 struct tcg_temp_info *ni = ts_info(ti->next_copy);
 
+if (ti->mem_loc && ts_is_copy(ts) && 0) {
+TCGMemLocation *ml, *nml;
+for (ml = ti->mem_loc; ml; ml = nml) {
+nml = ml->next_copy;
+ml->copy = ti->next_copy;
+ml->next_copy = ni->mem_loc;
+ni->mem_loc = ml;
+}
+} else {
+while (ti->mem_loc) {
+reset_ml(ti->mem_loc);
+}
+}
+
 ni->prev_copy = ti->prev_copy;
 pi->next_copy = ti->next_copy;
 ti->next_copy = ts;
 ti->prev_copy = ts;
 ti->is_const = false;
 ti->mask = -1;
+ti->mem_loc = NULL;
 }
 
 static void reset_temp(TCGArg arg)
@@ -103,6 +166,7 @@ static void init_ts_info(struct tcg_temp_info *infos,
 ti->prev_copy = ts;
 ti->is_const = false;
 ti->mask = -1;
+ti->mem_loc = NULL;
 set_bit(idx, temps_used->l);
 }
 }
@@ -119,6 +183,92 @@ static int op_bits(TCGOpcode op)
 return def->flags & TCG_OPF_64BIT ? 64 : 32;
 }
 
+/* Allocate a new MEMORY LOCATION of reuse a free one.  */
+static TCGMemLocation *alloc_ml(void)
+{
+if (free_mls) {
+TCGMemLocation *ml = free_mls;
+free_mls = free_mls->next;
+return ml;
+}
+return tcg_malloc(sizeof(TCGMemLocation));
+}
+
+/* Allocate and initialize MEMORY LOCATION.  */
+static TCGMemLocation *new_ml(tcg_target_ulong off, tcg_target_ulong sz,
+  TCGTemp *copy)
+{
+TCGMemLocation *ml = alloc_ml();
+
+ml->offset = off;
+ml->size = sz;
+ml->copy = copy;
+if (copy) {
+ml->type = copy->base_type;
+ml->next_copy = ts_info(copy)->mem_loc;
+ts_info(copy)->mem_loc = ml;
+} else {
+tcg_abort();
+}
+ml->next = mem_locations;
+if (ml->next) {
+ml->next->prev_ptr = >next;
+}
+ml->prev_ptr = _locations;
+mem_locations = ml;
+return ml;
+}
+
+static TCGMemLocation *find_ml(tcg_target_ulong off, tcg_target_ulong sz,
+   TCGType type)
+{
+TCGMemLocation *mi;
+for (mi = mem_locations; mi; mi =