On Tue Feb 8, 2022 at 8:05 AM CET, luigi scarso wrote:
> Can you post a patch using the current luatex experimental repo at
> https://serveur-svn.lri.fr/svn/modhel/luatex/branches/experimental/
> (see http://www.luatex.org/download.html ) ?

Attached.

I don't know how basing one patch on top of another works with TLpatches
and its .original files. I don't even even know if its manual or not, I
constructed the TLpatch file manually.

>From the version I shared last time there are three changes:

 - byte swapping when loaded byte code has different byte order
 - for simplicity I am wasting 8 bytes in each bytecode for detecting
   byte order and actually verify that integers dump correctly
 - I changed the "version" byte from 0 to 'T' so that it is explicit
   that other Lua 5.3.6 bytecodes are incompatible with LuaTeX

Neither the variable length encoding, neither byte swapping are compile
time options. I don't really want to get into preprocessor macros. This
seemed the simplest thing to work and shouldn't have much runtime cost
for most runs.

Just for record, I am not a proponent of these changes, it's a
demonstration that it can and has to be done on the Lua level and that
it seemingly works.

Also, LuaJIT bytecode is portable across all its architectures AFAIK, so
no problem with LuaJITTeX.

Sorry, I also didn't test the LuaTeX repository build, it's hard for me
to cross compile and run binaries that excersise byte order issues
and/or 16/32/64 bit architectures problems.

Michal
From 661ee0ca56f7fc09b3b497ad9fd2d72e2b0da0a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vlas=C3=A1k?= <lahc...@gmail.com>
Date: Tue, 8 Feb 2022 18:27:17 +0100
Subject: [PATCH] Make Lua bytecode more portable

- backports variable length ints from Lua 5.4
- marks bytecode as different version, since its incompatible with stock
  Lua 5.3.6
- print real error message in LuaTeX when bytecode fails to load
---
 .../lua53/TLpatches/patch-04-portable-dumping | 215 ++++++++++++++++++
 source/libs/lua53/lua53-src/src/ldump.c       |  33 ++-
 source/libs/lua53/lua53-src/src/lundump.c     |  75 ++++--
 source/libs/lua53/lua53-src/src/lundump.h     |   2 +-
 source/texk/web2c/luatexdir/lua/llualib.c     |   7 +-
 5 files changed, 302 insertions(+), 30 deletions(-)
 create mode 100644 source/libs/lua53/TLpatches/patch-04-portable-dumping

diff --git a/source/libs/lua53/TLpatches/patch-04-portable-dumping b/source/libs/lua53/TLpatches/patch-04-portable-dumping
new file mode 100644
index 000000000..f1d2c7034
--- /dev/null
+++ b/source/libs/lua53/TLpatches/patch-04-portable-dumping
@@ -0,0 +1,215 @@
+--- ldump.c.orig
++++ ldump.c
+@@ -55,8 +55,23 @@ static void DumpByte (int y, DumpState *D) {
+ }
+ 
+ 
++/* dumpInt Buff Size */
++#define DIBS    ((sizeof(size_t) * 8 / 7) + 1)
++
++static void DumpSize (size_t x, DumpState *D) {
++  lu_byte buff[DIBS];
++  int n = 0;
++  do {
++    buff[DIBS - (++n)] = x & 0x7f;  /* fill buffer in reverse order */
++    x >>= 7;
++  } while (x != 0);
++  buff[DIBS - 1] |= 0x80;  /* mark last byte */
++  DumpVector(buff + DIBS - n, n, D);
++}
++
++
+ static void DumpInt (int x, DumpState *D) {
+-  DumpVar(x, D);
++  DumpSize(x, D);
+ }
+ 
+ 
+@@ -72,17 +87,12 @@ static void DumpInteger (lua_Integer x, DumpState *D) {
+ 
+ static void DumpString (const TString *s, DumpState *D) {
+   if (s == NULL)
+-    DumpByte(0, D);
++    DumpSize(0, D);
+   else {
+-    size_t size = tsslen(s) + 1;  /* include trailing '\0' */
++    size_t size = tsslen(s);
+     const char *str = getstr(s);
+-    if (size < 0xFF)
+-      DumpByte(cast_int(size), D);
+-    else {
+-      DumpByte(0xFF, D);
+-      DumpVar(size, D);
+-    }
+-    DumpVector(str, size - 1, D);  /* no need to save '\0' */
++    DumpSize(size + 1, D);
++    DumpVector(str, size, D);
+   }
+ }
+ 
+@@ -186,12 +196,11 @@ static void DumpHeader (DumpState *D) {
+   DumpByte(LUAC_VERSION, D);
+   DumpByte(LUAC_FORMAT, D);
+   DumpLiteral(LUAC_DATA, D);
+-  DumpByte(sizeof(int), D);
+-  DumpByte(sizeof(size_t), D);
+   DumpByte(sizeof(Instruction), D);
+   DumpByte(sizeof(lua_Integer), D);
+   DumpByte(sizeof(lua_Number), D);
+   DumpInteger(LUAC_INT, D);
++  DumpInteger(LUAC_INT, D);
+   DumpNumber(LUAC_NUM, D);
+ }
+ 
+--- lundump.c.orig
++++ lundump.c
+@@ -10,6 +10,7 @@
+ #include "lprefix.h"
+ 
+ 
++#include <limits.h>
+ #include <string.h>
+ 
+ #include "lua.h"
+@@ -33,6 +34,7 @@ typedef struct {
+   lua_State *L;
+   ZIO *Z;
+   const char *name;
++  int swap;
+ } LoadState;
+ 
+ 
+@@ -46,11 +48,40 @@ static l_noret error(LoadState *S, const char *why) {
+ ** All high-level loads go through LoadVector; you can change it to
+ ** adapt to the endianness of the input
+ */
+-#define LoadVector(S,b,n)	LoadBlock(S,b,(n)*sizeof((b)[0]))
++#define LoadVector(S,b,n)	LoadBlock(S,b,n,sizeof((b)[0]))
+ 
+-static void LoadBlock (LoadState *S, void *b, size_t size) {
+-  if (luaZ_read(S->Z, b, size) != 0)
++static void LoadBlock (LoadState *S, void *b, size_t n, size_t size) {
++  size_t totalsize = n * size;
++
++  if (luaZ_read(S->Z, b, totalsize) != 0)
+     error(S, "truncated");
++
++  if (S->swap) {
++    unsigned char *buff = b;
++    unsigned char *buff_end = b + totalsize;
++    unsigned char tmp;
++    switch (size) {
++    case 1:
++      break;
++    case 4: // Instruction
++      for (; buff != buff_end; buff += 4) {
++        tmp = buff[0]; buff[0] = buff[3]; buff[3] = tmp;
++        tmp = buff[1]; buff[1] = buff[2]; buff[2] = tmp;
++      }
++      break;
++    case 8: // lua_Integer, lua_Number
++      for (; buff != buff_end; buff += 8) {
++        tmp = buff[0]; buff[0] = buff[7]; buff[7] = tmp;
++        tmp = buff[1]; buff[1] = buff[6]; buff[6] = tmp;
++        tmp = buff[2]; buff[2] = buff[5]; buff[5] = tmp;
++        tmp = buff[3]; buff[3] = buff[4]; buff[4] = tmp;
++      }
++      break;
++    default:
++      error(S, "invalid element size");
++      break;
++    }
++  }
+ }
+ 
+ 
+@@ -64,13 +95,30 @@ static lu_byte LoadByte (LoadState *S) {
+ }
+ 
+ 
+-static int LoadInt (LoadState *S) {
+-  int x;
+-  LoadVar(S, x);
++static size_t LoadUnsigned (LoadState *S, size_t limit) {
++  size_t x = 0;
++  int b;
++  limit >>= 7;
++  do {
++    b = LoadByte(S);
++    if (x >= limit)
++      error(S, "integer overflow");
++    x = (x << 7) | (b & 0x7f);
++  } while ((b & 0x80) == 0);
+   return x;
+ }
+ 
+ 
++static size_t LoadSize (LoadState *S) {
++  return LoadUnsigned(S, ~(size_t)0);
++}
++
++
++static int LoadInt (LoadState *S) {
++  return cast_int(LoadUnsigned(S, INT_MAX));
++}
++
++
+ static lua_Number LoadNumber (LoadState *S) {
+   lua_Number x;
+   LoadVar(S, x);
+@@ -87,10 +135,8 @@ static lua_Integer LoadInteger (LoadState *S) {
+ 
+ static TString *LoadString (LoadState *S, Proto *p) {
+   lua_State *L = S->L;
+-  size_t size = LoadByte(S);
++  size_t size = LoadSize(S);
+   TString *ts;
+-  if (size == 0xFF)
+-    LoadVar(S, size);
+   if (size == 0)
+     return NULL;
+   else if (--size <= LUAI_MAXSHORTLEN) {  /* short string? */
+@@ -241,19 +287,19 @@ static void fchecksize (LoadState *S, size_t size, const char *tname) {
+ #define checksize(S,t)	fchecksize(S,sizeof(t),#t)
+ 
+ static void checkHeader (LoadState *S) {
+-  checkliteral(S, LUA_SIGNATURE + 1, "not a");  /* 1st char already checked */
++  checkliteral(S, &LUA_SIGNATURE[1], "not a");  /* 1st char already checked */
+   if (LoadByte(S) != LUAC_VERSION)
+     error(S, "version mismatch in");
+   if (LoadByte(S) != LUAC_FORMAT)
+-    error(S, "format mismatch in");
++    error(S, "expected LuaTeX format");
+   checkliteral(S, LUAC_DATA, "corrupted");
+-  checksize(S, int);
+-  checksize(S, size_t);
+   checksize(S, Instruction);
+   checksize(S, lua_Integer);
+   checksize(S, lua_Number);
+   if (LoadInteger(S) != LUAC_INT)
+-    error(S, "endianness mismatch in");
++    S->swap = 1;
++  if (LoadInteger(S) != LUAC_INT)
++    error(S, "integer format mismatch");
+   if (LoadNumber(S) != LUAC_NUM)
+     error(S, "float format mismatch in");
+ }
+@@ -273,6 +319,7 @@ LClosure *luaU_undump(lua_State *L, ZIO *Z, const char *name) {
+     S.name = name;
+   S.L = L;
+   S.Z = Z;
++  S.swap = 0;
+   checkHeader(&S);
+   cl = luaF_newLclosure(L, LoadByte(&S));
+   setclLvalue(L, L->top, cl);
+--- lundump.h.orig
++++ lundump.h
+@@ -20,7 +20,7 @@
+ 
+ #define MYINT(s)	(s[0]-'0')
+ #define LUAC_VERSION	(MYINT(LUA_VERSION_MAJOR)*16+MYINT(LUA_VERSION_MINOR))
+-#define LUAC_FORMAT	0	/* this is the official format */
++#define LUAC_FORMAT	84	/* this is the LuaTeX format */
+ 
+ /* load one chunk; from lundump.c */
+ LUAI_FUNC LClosure* luaU_undump (lua_State* L, ZIO* Z, const char* name);
diff --git a/source/libs/lua53/lua53-src/src/ldump.c b/source/libs/lua53/lua53-src/src/ldump.c
index f025acac3..cf8d29c3e 100644
--- a/source/libs/lua53/lua53-src/src/ldump.c
+++ b/source/libs/lua53/lua53-src/src/ldump.c
@@ -55,8 +55,23 @@ static void DumpByte (int y, DumpState *D) {
 }
 
 
+/* dumpInt Buff Size */
+#define DIBS    ((sizeof(size_t) * 8 / 7) + 1)
+
+static void DumpSize (size_t x, DumpState *D) {
+  lu_byte buff[DIBS];
+  int n = 0;
+  do {
+    buff[DIBS - (++n)] = x & 0x7f;  /* fill buffer in reverse order */
+    x >>= 7;
+  } while (x != 0);
+  buff[DIBS - 1] |= 0x80;  /* mark last byte */
+  DumpVector(buff + DIBS - n, n, D);
+}
+
+
 static void DumpInt (int x, DumpState *D) {
-  DumpVar(x, D);
+  DumpSize(x, D);
 }
 
 
@@ -72,17 +87,12 @@ static void DumpInteger (lua_Integer x, DumpState *D) {
 
 static void DumpString (const TString *s, DumpState *D) {
   if (s == NULL)
-    DumpByte(0, D);
+    DumpSize(0, D);
   else {
-    size_t size = tsslen(s) + 1;  /* include trailing '\0' */
+    size_t size = tsslen(s);
     const char *str = getstr(s);
-    if (size < 0xFF)
-      DumpByte(cast_int(size), D);
-    else {
-      DumpByte(0xFF, D);
-      DumpVar(size, D);
-    }
-    DumpVector(str, size - 1, D);  /* no need to save '\0' */
+    DumpSize(size + 1, D);
+    DumpVector(str, size, D);
   }
 }
 
@@ -186,12 +196,11 @@ static void DumpHeader (DumpState *D) {
   DumpByte(LUAC_VERSION, D);
   DumpByte(LUAC_FORMAT, D);
   DumpLiteral(LUAC_DATA, D);
-  DumpByte(sizeof(int), D);
-  DumpByte(sizeof(size_t), D);
   DumpByte(sizeof(Instruction), D);
   DumpByte(sizeof(lua_Integer), D);
   DumpByte(sizeof(lua_Number), D);
   DumpInteger(LUAC_INT, D);
+  DumpInteger(LUAC_INT, D);
   DumpNumber(LUAC_NUM, D);
 }
 
diff --git a/source/libs/lua53/lua53-src/src/lundump.c b/source/libs/lua53/lua53-src/src/lundump.c
index edf9eb8d0..be000459e 100644
--- a/source/libs/lua53/lua53-src/src/lundump.c
+++ b/source/libs/lua53/lua53-src/src/lundump.c
@@ -10,6 +10,7 @@
 #include "lprefix.h"
 
 
+#include <limits.h>
 #include <string.h>
 
 #include "lua.h"
@@ -33,6 +34,7 @@ typedef struct {
   lua_State *L;
   ZIO *Z;
   const char *name;
+  int swap;
 } LoadState;
 
 
@@ -46,11 +48,40 @@ static l_noret error(LoadState *S, const char *why) {
 ** All high-level loads go through LoadVector; you can change it to
 ** adapt to the endianness of the input
 */
-#define LoadVector(S,b,n)	LoadBlock(S,b,(n)*sizeof((b)[0]))
+#define LoadVector(S,b,n)	LoadBlock(S,b,n,sizeof((b)[0]))
 
-static void LoadBlock (LoadState *S, void *b, size_t size) {
-  if (luaZ_read(S->Z, b, size) != 0)
+static void LoadBlock (LoadState *S, void *b, size_t n, size_t size) {
+  size_t totalsize = n * size;
+
+  if (luaZ_read(S->Z, b, totalsize) != 0)
     error(S, "truncated");
+
+  if (S->swap) {
+    unsigned char *buff = b;
+    unsigned char *buff_end = b + totalsize;
+    unsigned char tmp;
+    switch (size) {
+    case 1:
+      break;
+    case 4: // Instruction
+      for (; buff != buff_end; buff += 4) {
+        tmp = buff[0]; buff[0] = buff[3]; buff[3] = tmp;
+        tmp = buff[1]; buff[1] = buff[2]; buff[2] = tmp;
+      }
+      break;
+    case 8: // lua_Integer, lua_Number
+      for (; buff != buff_end; buff += 8) {
+        tmp = buff[0]; buff[0] = buff[7]; buff[7] = tmp;
+        tmp = buff[1]; buff[1] = buff[6]; buff[6] = tmp;
+        tmp = buff[2]; buff[2] = buff[5]; buff[5] = tmp;
+        tmp = buff[3]; buff[3] = buff[4]; buff[4] = tmp;
+      }
+      break;
+    default:
+      error(S, "invalid element size");
+      break;
+    }
+  }
 }
 
 
@@ -64,13 +95,30 @@ static lu_byte LoadByte (LoadState *S) {
 }
 
 
-static int LoadInt (LoadState *S) {
-  int x;
-  LoadVar(S, x);
+static size_t LoadUnsigned (LoadState *S, size_t limit) {
+  size_t x = 0;
+  int b;
+  limit >>= 7;
+  do {
+    b = LoadByte(S);
+    if (x >= limit)
+      error(S, "integer overflow");
+    x = (x << 7) | (b & 0x7f);
+  } while ((b & 0x80) == 0);
   return x;
 }
 
 
+static size_t LoadSize (LoadState *S) {
+  return LoadUnsigned(S, ~(size_t)0);
+}
+
+
+static int LoadInt (LoadState *S) {
+  return cast_int(LoadUnsigned(S, INT_MAX));
+}
+
+
 static lua_Number LoadNumber (LoadState *S) {
   lua_Number x;
   LoadVar(S, x);
@@ -87,10 +135,8 @@ static lua_Integer LoadInteger (LoadState *S) {
 
 static TString *LoadString (LoadState *S, Proto *p) {
   lua_State *L = S->L;
-  size_t size = LoadByte(S);
+  size_t size = LoadSize(S);
   TString *ts;
-  if (size == 0xFF)
-    LoadVar(S, size);
   if (size == 0)
     return NULL;
   else if (--size <= LUAI_MAXSHORTLEN) {  /* short string? */
@@ -241,19 +287,19 @@ static void fchecksize (LoadState *S, size_t size, const char *tname) {
 #define checksize(S,t)	fchecksize(S,sizeof(t),#t)
 
 static void checkHeader (LoadState *S) {
-  checkliteral(S, LUA_SIGNATURE + 1, "not a");  /* 1st char already checked */
+  checkliteral(S, &LUA_SIGNATURE[1], "not a");  /* 1st char already checked */
   if (LoadByte(S) != LUAC_VERSION)
     error(S, "version mismatch in");
   if (LoadByte(S) != LUAC_FORMAT)
-    error(S, "format mismatch in");
+    error(S, "expected LuaTeX format");
   checkliteral(S, LUAC_DATA, "corrupted");
-  checksize(S, int);
-  checksize(S, size_t);
   checksize(S, Instruction);
   checksize(S, lua_Integer);
   checksize(S, lua_Number);
   if (LoadInteger(S) != LUAC_INT)
-    error(S, "endianness mismatch in");
+    S->swap = 1;
+  if (LoadInteger(S) != LUAC_INT)
+    error(S, "integer format mismatch");
   if (LoadNumber(S) != LUAC_NUM)
     error(S, "float format mismatch in");
 }
@@ -273,6 +319,7 @@ LClosure *luaU_undump(lua_State *L, ZIO *Z, const char *name) {
     S.name = name;
   S.L = L;
   S.Z = Z;
+  S.swap = 0;
   checkHeader(&S);
   cl = luaF_newLclosure(L, LoadByte(&S));
   setclLvalue(L, L->top, cl);
diff --git a/source/libs/lua53/lua53-src/src/lundump.h b/source/libs/lua53/lua53-src/src/lundump.h
index f3e2e9061..8575d95a0 100644
--- a/source/libs/lua53/lua53-src/src/lundump.h
+++ b/source/libs/lua53/lua53-src/src/lundump.h
@@ -20,7 +20,7 @@
 
 #define MYINT(s)	(s[0]-'0')
 #define LUAC_VERSION	(MYINT(LUA_VERSION_MAJOR)*16+MYINT(LUA_VERSION_MINOR))
-#define LUAC_FORMAT	0	/* this is the official format */
+#define LUAC_FORMAT	84	/* this is the LuaTeX format */
 
 /* load one chunk; from lundump.c */
 LUAI_FUNC LClosure* luaU_undump (lua_State* L, ZIO* Z, const char* name);
diff --git a/source/texk/web2c/luatexdir/lua/llualib.c b/source/texk/web2c/luatexdir/lua/llualib.c
index 0586aba02..68ee678f0 100644
--- a/source/texk/web2c/luatexdir/lua/llualib.c
+++ b/source/texk/web2c/luatexdir/lua/llualib.c
@@ -195,11 +195,12 @@ static int get_bytecode(lua_State * L)
             if (lua_load
                 (L, reader, (void *) (lua_bytecode_registers + k),
 #ifdef LuajitTeX
-                 "bytecode")) {
+                 "bytecode") != LUA_OK) {
 #else
-                 "bytecode", NULL)) {
+                 "bytecode", NULL) != 0) {
 #endif
-                return luaL_error(L, "bad bytecode register");
+                // error message is on top of the stack
+                return lua_error(L);
             } else {
                 lua_pushvalue(L, -1);
                 bytecode_register_shadow_set(L, k);
-- 
2.35.1

Reply via email to