Re: [Mesa-dev] [PATCH] mesa: fix display list 8-byte alignment issue

2015-01-30 Thread Brian Paul

On 01/30/2015 03:25 AM, Jose Fonseca wrote:

Looks good to me.


Just one minor suggestion: if we replaced

   sizeof(void *) == 8

with

   sizeof(void *) > sizeof(GLuint)


Yes, or more accurately, sizeof(Node).



we would avoid the magic number 8 and make the code correct for any
pointer size.


Thanks for the review.

-Brian




Jose



On 28/01/15 03:06, Brian Paul wrote:

The _mesa_dlist_alloc() function is only guaranteed to return a pointer
with 4-byte alignment.  On 64-bit systems which don't support unaligned
loads (e.g. SPARC or MIPS) this could lead to a bus error in the VBO
code.

The solution is to add a new  _mesa_dlist_alloc_aligned() function which
will return a pointer to an 8-byte aligned address on 64-bit systems.
This is accomplished by inserting a 4-byte NOP instruction in the display
list when needed.

The only place this actually matters is the VBO code where we need to
allocate a 'struct vbo_save_vertex_list' which needs to be 8-byte
aligned (just as if it were malloc'd).

The gears demo and others hit this bug.

Bugzilla:
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D88662&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=DaIFyU2hnmYCL1EaelhvLHTsOdyZV8y6pEVRwRkcp8Q&s=g3pos-5bc_Uu5plvnuQvIjeEJXLDgTr5eOznhu6o-Fo&e=

Cc: "10.4" 
---
  src/mesa/main/dlist.c   | 72
+
  src/mesa/main/dlist.h   |  3 ++
  src/mesa/vbo/vbo_save_api.c |  5 +++-
  3 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 138d360..dc6070b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -487,6 +487,7 @@ typedef enum
 /* The following three are meta instructions */
 OPCODE_ERROR,/* raise compiled-in error */
 OPCODE_CONTINUE,
+   OPCODE_NOP,  /* No-op (used for 8-byte alignment */
 OPCODE_END_OF_LIST,
 OPCODE_EXT_0
  } OpCode;
@@ -1012,13 +1013,16 @@ memdup(const void *src, GLsizei bytes)
   * Allocate space for a display list instruction (opcode + payload
space).
   * \param opcode  the instruction opcode (OPCODE_* value)
   * \param bytes   instruction payload size (not counting opcode)
- * \return pointer to allocated memory (the opcode space)
+ * \param align8  does the payload need to be 8-byte aligned?
+ *This is only relevant in 64-bit environments.
+ * \return pointer to allocated memory (the payload will be at
pointer+1)
   */
  static Node *
-dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
+dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes, bool
align8)
  {
 const GLuint numNodes = 1 + (bytes + sizeof(Node) - 1) /
sizeof(Node);
 const GLuint contNodes = 1 + POINTER_DWORDS;  /* size of continue
info */
+   GLuint nopNode;
 Node *n;

 if (opcode < OPCODE_EXT_0) {
@@ -1032,7 +1036,19 @@ dlist_alloc(struct gl_context *ctx, OpCode
opcode, GLuint bytes)
}
 }

-   if (ctx->ListState.CurrentPos + numNodes + contNodes > BLOCK_SIZE) {
+   if (sizeof(void *) == 8 && align8 && ctx->ListState.CurrentPos % 2
== 0) {
+  /* The opcode would get placed at node[0] and the payload would
start
+   * at node[1].  But the payload needs to be at an even offset
(8-byte
+   * multiple).
+   */
+  nopNode = 1;
+   }
+   else {
+  nopNode = 0;
+   }
+
+   if (ctx->ListState.CurrentPos + nopNode + numNodes + contNodes
+   > BLOCK_SIZE) {
/* This block is full.  Allocate a new block and chain to it */
Node *newblock;
n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
@@ -1042,13 +1058,34 @@ dlist_alloc(struct gl_context *ctx, OpCode
opcode, GLuint bytes)
   _mesa_error(ctx, GL_OUT_OF_MEMORY, "Building display list");
   return NULL;
}
+
+  /* a fresh block should be 8-byte aligned on 64-bit systems */
+  assert(((GLintptr) newblock) % sizeof(void *) == 0);
+
save_pointer(&n[1], newblock);
ctx->ListState.CurrentBlock = newblock;
ctx->ListState.CurrentPos = 0;
+
+  /* Display list nodes are always 4 bytes.  If we need 8-byte
alignment
+   * we have to insert a NOP so that the payload of the real
opcode lands
+   * on an even location:
+   *   node[0] = OPCODE_NOP
+   *   node[1] = OPCODE_x;
+   *   node[2] = start of payload
+   */
+  nopNode = sizeof(void *) == 8 && align8;
 }

 n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
-   ctx->ListState.CurrentPos += numNodes;
+   if (nopNode) {
+  assert(ctx->ListState.CurrentPos % 2 == 0); /* even value */
+  n[0].opcode = OPCODE_NOP;
+  n++;
+  /* The "real" opcode will now be at an odd location and the
payload
+   * will be at an even location.
+   */
+   }
+   ctx->ListState.CurrentPos += nopNode + numNodes;

 n[0].opcode = opcode;

@@ 

Re: [Mesa-dev] [PATCH] mesa: fix display list 8-byte alignment issue

2015-01-30 Thread Jose Fonseca

Looks good to me.


Just one minor suggestion: if we replaced

  sizeof(void *) == 8

with

  sizeof(void *) > sizeof(GLuint)

we would avoid the magic number 8 and make the code correct for any 
pointer size.


Jose



On 28/01/15 03:06, Brian Paul wrote:

The _mesa_dlist_alloc() function is only guaranteed to return a pointer
with 4-byte alignment.  On 64-bit systems which don't support unaligned
loads (e.g. SPARC or MIPS) this could lead to a bus error in the VBO code.

The solution is to add a new  _mesa_dlist_alloc_aligned() function which
will return a pointer to an 8-byte aligned address on 64-bit systems.
This is accomplished by inserting a 4-byte NOP instruction in the display
list when needed.

The only place this actually matters is the VBO code where we need to
allocate a 'struct vbo_save_vertex_list' which needs to be 8-byte
aligned (just as if it were malloc'd).

The gears demo and others hit this bug.

Bugzilla: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D88662&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=DaIFyU2hnmYCL1EaelhvLHTsOdyZV8y6pEVRwRkcp8Q&s=g3pos-5bc_Uu5plvnuQvIjeEJXLDgTr5eOznhu6o-Fo&e=
Cc: "10.4" 
---
  src/mesa/main/dlist.c   | 72 +
  src/mesa/main/dlist.h   |  3 ++
  src/mesa/vbo/vbo_save_api.c |  5 +++-
  3 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 138d360..dc6070b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -487,6 +487,7 @@ typedef enum
 /* The following three are meta instructions */
 OPCODE_ERROR,/* raise compiled-in error */
 OPCODE_CONTINUE,
+   OPCODE_NOP,  /* No-op (used for 8-byte alignment */
 OPCODE_END_OF_LIST,
 OPCODE_EXT_0
  } OpCode;
@@ -1012,13 +1013,16 @@ memdup(const void *src, GLsizei bytes)
   * Allocate space for a display list instruction (opcode + payload space).
   * \param opcode  the instruction opcode (OPCODE_* value)
   * \param bytes   instruction payload size (not counting opcode)
- * \return pointer to allocated memory (the opcode space)
+ * \param align8  does the payload need to be 8-byte aligned?
+ *This is only relevant in 64-bit environments.
+ * \return pointer to allocated memory (the payload will be at pointer+1)
   */
  static Node *
-dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
+dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes, bool align8)
  {
 const GLuint numNodes = 1 + (bytes + sizeof(Node) - 1) / sizeof(Node);
 const GLuint contNodes = 1 + POINTER_DWORDS;  /* size of continue info */
+   GLuint nopNode;
 Node *n;

 if (opcode < OPCODE_EXT_0) {
@@ -1032,7 +1036,19 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
}
 }

-   if (ctx->ListState.CurrentPos + numNodes + contNodes > BLOCK_SIZE) {
+   if (sizeof(void *) == 8 && align8 && ctx->ListState.CurrentPos % 2 == 0) {
+  /* The opcode would get placed at node[0] and the payload would start
+   * at node[1].  But the payload needs to be at an even offset (8-byte
+   * multiple).
+   */
+  nopNode = 1;
+   }
+   else {
+  nopNode = 0;
+   }
+
+   if (ctx->ListState.CurrentPos + nopNode + numNodes + contNodes
+   > BLOCK_SIZE) {
/* This block is full.  Allocate a new block and chain to it */
Node *newblock;
n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
@@ -1042,13 +1058,34 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
   _mesa_error(ctx, GL_OUT_OF_MEMORY, "Building display list");
   return NULL;
}
+
+  /* a fresh block should be 8-byte aligned on 64-bit systems */
+  assert(((GLintptr) newblock) % sizeof(void *) == 0);
+
save_pointer(&n[1], newblock);
ctx->ListState.CurrentBlock = newblock;
ctx->ListState.CurrentPos = 0;
+
+  /* Display list nodes are always 4 bytes.  If we need 8-byte alignment
+   * we have to insert a NOP so that the payload of the real opcode lands
+   * on an even location:
+   *   node[0] = OPCODE_NOP
+   *   node[1] = OPCODE_x;
+   *   node[2] = start of payload
+   */
+  nopNode = sizeof(void *) == 8 && align8;
 }

 n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
-   ctx->ListState.CurrentPos += numNodes;
+   if (nopNode) {
+  assert(ctx->ListState.CurrentPos % 2 == 0); /* even value */
+  n[0].opcode = OPCODE_NOP;
+  n++;
+  /* The "real" opcode will now be at an odd location and the payload
+   * will be at an even location.
+   */
+   }
+   ctx->ListState.CurrentPos += nopNode + numNodes;

 n[0].opcode = opcode;

@@ -1069,7 +1106,22 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
  void *
  _mesa_dlist_alloc(struct 

Re: [Mesa-dev] [PATCH] mesa: fix display list 8-byte alignment issue

2015-01-29 Thread Brian Paul

Ping.

I'll handle the cherry-pick to 10.4 since a small code change is needed.

-Brian

On 01/27/2015 08:06 PM, Brian Paul wrote:

The _mesa_dlist_alloc() function is only guaranteed to return a pointer
with 4-byte alignment.  On 64-bit systems which don't support unaligned
loads (e.g. SPARC or MIPS) this could lead to a bus error in the VBO code.

The solution is to add a new  _mesa_dlist_alloc_aligned() function which
will return a pointer to an 8-byte aligned address on 64-bit systems.
This is accomplished by inserting a 4-byte NOP instruction in the display
list when needed.

The only place this actually matters is the VBO code where we need to
allocate a 'struct vbo_save_vertex_list' which needs to be 8-byte
aligned (just as if it were malloc'd).

The gears demo and others hit this bug.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88662
Cc: "10.4" 
---
  src/mesa/main/dlist.c   | 72 +
  src/mesa/main/dlist.h   |  3 ++
  src/mesa/vbo/vbo_save_api.c |  5 +++-
  3 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 138d360..dc6070b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -487,6 +487,7 @@ typedef enum
 /* The following three are meta instructions */
 OPCODE_ERROR,/* raise compiled-in error */
 OPCODE_CONTINUE,
+   OPCODE_NOP,  /* No-op (used for 8-byte alignment */
 OPCODE_END_OF_LIST,
 OPCODE_EXT_0
  } OpCode;
@@ -1012,13 +1013,16 @@ memdup(const void *src, GLsizei bytes)
   * Allocate space for a display list instruction (opcode + payload space).
   * \param opcode  the instruction opcode (OPCODE_* value)
   * \param bytes   instruction payload size (not counting opcode)
- * \return pointer to allocated memory (the opcode space)
+ * \param align8  does the payload need to be 8-byte aligned?
+ *This is only relevant in 64-bit environments.
+ * \return pointer to allocated memory (the payload will be at pointer+1)
   */
  static Node *
-dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
+dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes, bool align8)
  {
 const GLuint numNodes = 1 + (bytes + sizeof(Node) - 1) / sizeof(Node);
 const GLuint contNodes = 1 + POINTER_DWORDS;  /* size of continue info */
+   GLuint nopNode;
 Node *n;

 if (opcode < OPCODE_EXT_0) {
@@ -1032,7 +1036,19 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
}
 }

-   if (ctx->ListState.CurrentPos + numNodes + contNodes > BLOCK_SIZE) {
+   if (sizeof(void *) == 8 && align8 && ctx->ListState.CurrentPos % 2 == 0) {
+  /* The opcode would get placed at node[0] and the payload would start
+   * at node[1].  But the payload needs to be at an even offset (8-byte
+   * multiple).
+   */
+  nopNode = 1;
+   }
+   else {
+  nopNode = 0;
+   }
+
+   if (ctx->ListState.CurrentPos + nopNode + numNodes + contNodes
+   > BLOCK_SIZE) {
/* This block is full.  Allocate a new block and chain to it */
Node *newblock;
n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
@@ -1042,13 +1058,34 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
   _mesa_error(ctx, GL_OUT_OF_MEMORY, "Building display list");
   return NULL;
}
+
+  /* a fresh block should be 8-byte aligned on 64-bit systems */
+  assert(((GLintptr) newblock) % sizeof(void *) == 0);
+
save_pointer(&n[1], newblock);
ctx->ListState.CurrentBlock = newblock;
ctx->ListState.CurrentPos = 0;
+
+  /* Display list nodes are always 4 bytes.  If we need 8-byte alignment
+   * we have to insert a NOP so that the payload of the real opcode lands
+   * on an even location:
+   *   node[0] = OPCODE_NOP
+   *   node[1] = OPCODE_x;
+   *   node[2] = start of payload
+   */
+  nopNode = sizeof(void *) == 8 && align8;
 }

 n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
-   ctx->ListState.CurrentPos += numNodes;
+   if (nopNode) {
+  assert(ctx->ListState.CurrentPos % 2 == 0); /* even value */
+  n[0].opcode = OPCODE_NOP;
+  n++;
+  /* The "real" opcode will now be at an odd location and the payload
+   * will be at an even location.
+   */
+   }
+   ctx->ListState.CurrentPos += nopNode + numNodes;

 n[0].opcode = opcode;

@@ -1069,7 +1106,22 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
  void *
  _mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint bytes)
  {
-   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes);
+   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes, false);
+   if (n)
+  return n + 1;  /* return pointer to payload area, after opcode */
+   else
+  return NULL;
+}
+
+
+/**
+ * Same as _mesa_dlist_alloc(), but return a pointer which is 8-byte
+ * aligne

[Mesa-dev] [PATCH] mesa: fix display list 8-byte alignment issue

2015-01-27 Thread Brian Paul
The _mesa_dlist_alloc() function is only guaranteed to return a pointer
with 4-byte alignment.  On 64-bit systems which don't support unaligned
loads (e.g. SPARC or MIPS) this could lead to a bus error in the VBO code.

The solution is to add a new  _mesa_dlist_alloc_aligned() function which
will return a pointer to an 8-byte aligned address on 64-bit systems.
This is accomplished by inserting a 4-byte NOP instruction in the display
list when needed.

The only place this actually matters is the VBO code where we need to
allocate a 'struct vbo_save_vertex_list' which needs to be 8-byte
aligned (just as if it were malloc'd).

The gears demo and others hit this bug.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88662
Cc: "10.4" 
---
 src/mesa/main/dlist.c   | 72 +
 src/mesa/main/dlist.h   |  3 ++
 src/mesa/vbo/vbo_save_api.c |  5 +++-
 3 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 138d360..dc6070b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -487,6 +487,7 @@ typedef enum
/* The following three are meta instructions */
OPCODE_ERROR,/* raise compiled-in error */
OPCODE_CONTINUE,
+   OPCODE_NOP,  /* No-op (used for 8-byte alignment */
OPCODE_END_OF_LIST,
OPCODE_EXT_0
 } OpCode;
@@ -1012,13 +1013,16 @@ memdup(const void *src, GLsizei bytes)
  * Allocate space for a display list instruction (opcode + payload space).
  * \param opcode  the instruction opcode (OPCODE_* value)
  * \param bytes   instruction payload size (not counting opcode)
- * \return pointer to allocated memory (the opcode space)
+ * \param align8  does the payload need to be 8-byte aligned?
+ *This is only relevant in 64-bit environments.
+ * \return pointer to allocated memory (the payload will be at pointer+1)
  */
 static Node *
-dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
+dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes, bool align8)
 {
const GLuint numNodes = 1 + (bytes + sizeof(Node) - 1) / sizeof(Node);
const GLuint contNodes = 1 + POINTER_DWORDS;  /* size of continue info */
+   GLuint nopNode;
Node *n;
 
if (opcode < OPCODE_EXT_0) {
@@ -1032,7 +1036,19 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
   }
}
 
-   if (ctx->ListState.CurrentPos + numNodes + contNodes > BLOCK_SIZE) {
+   if (sizeof(void *) == 8 && align8 && ctx->ListState.CurrentPos % 2 == 0) {
+  /* The opcode would get placed at node[0] and the payload would start
+   * at node[1].  But the payload needs to be at an even offset (8-byte
+   * multiple).
+   */
+  nopNode = 1;
+   }
+   else {
+  nopNode = 0;
+   }
+
+   if (ctx->ListState.CurrentPos + nopNode + numNodes + contNodes
+   > BLOCK_SIZE) {
   /* This block is full.  Allocate a new block and chain to it */
   Node *newblock;
   n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
@@ -1042,13 +1058,34 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
  _mesa_error(ctx, GL_OUT_OF_MEMORY, "Building display list");
  return NULL;
   }
+
+  /* a fresh block should be 8-byte aligned on 64-bit systems */
+  assert(((GLintptr) newblock) % sizeof(void *) == 0);
+
   save_pointer(&n[1], newblock);
   ctx->ListState.CurrentBlock = newblock;
   ctx->ListState.CurrentPos = 0;
+
+  /* Display list nodes are always 4 bytes.  If we need 8-byte alignment
+   * we have to insert a NOP so that the payload of the real opcode lands
+   * on an even location:
+   *   node[0] = OPCODE_NOP
+   *   node[1] = OPCODE_x;
+   *   node[2] = start of payload
+   */
+  nopNode = sizeof(void *) == 8 && align8;
}
 
n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
-   ctx->ListState.CurrentPos += numNodes;
+   if (nopNode) {
+  assert(ctx->ListState.CurrentPos % 2 == 0); /* even value */
+  n[0].opcode = OPCODE_NOP;
+  n++;
+  /* The "real" opcode will now be at an odd location and the payload
+   * will be at an even location.
+   */
+   }
+   ctx->ListState.CurrentPos += nopNode + numNodes;
 
n[0].opcode = opcode;
 
@@ -1069,7 +1106,22 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, 
GLuint bytes)
 void *
 _mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint bytes)
 {
-   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes);
+   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes, false);
+   if (n)
+  return n + 1;  /* return pointer to payload area, after opcode */
+   else
+  return NULL;
+}
+
+
+/**
+ * Same as _mesa_dlist_alloc(), but return a pointer which is 8-byte
+ * aligned in 64-bit environments, 4-byte aligned otherwise.
+ */
+void *
+_mesa_dlist_alloc_aligned(struct gl_context *ctx, GLuint opcode, GLuint bytes)
+{
+   Node *n = dli