This proof of concept patch modifies GCC to have 32-bit pointers and longs on x86-64.
This allows to create an "x86-32" architecture that takes advantage of the higher number of registers and support for 64-bit computation in x86-64 long mode while avoiding the disadvantage of increased memory usage due to 64-bit pointers and longs in structures. Thus, such a GCC version could be used to produce a GNU/Linux distribution with the performance of x86-64 and the reduced memory usage of i386. Furthermore, programs requiring "large data" could use special "64-bit pointer" attributes to only use 64-bit pointers to reference the relevant large arrays/structures, using 32-bit pointers for everything else. The current patch is just an hack and should obviously be made configurable and reimplemented properly. Just setting POINTER_SIZE to 32 mostly works but more hacks are necessary to get PIC compilation working (note that the hacks are probably at least partially wrong, since I'm not an experienced GCC hacker). A patch to binutils is also required to stop it from complaining about 32-bit relocations in shared objects. Currently a simple "Hello world!" program will work using a standard x86-64 dynamic loader and libc. This is because the function call ABI is unchanged and thus anything that doesn't use structures containing pointers or longs should be binary compatible. I do not really intend to work on this personally: I did this initial work for fun and I'm posting these patches to possibly stimulate broader interest on this concept. A possible roadmap for this would be: 1. Make it configurable 2. Fix the LEA hacks and allow proper PIC compilation 3. Fix everything else that may not work properly (e.g. PIC, relocations, exception handling, TLS, debug info) 4. Add a "32-bit object" flag to x86-64 objects 5. Modify libc so that allocations are made in the lower 4GB space for x86-32 shared objects and modify x86-64 assembly code to work with 32-bit pointers 6. Compile a native x86-32 libc and compile and test a full Debian or Ubuntu distribution 7. Add support for loading x86-32 and x86-64 objects in the same address space, using a single modified 64-bit libc (that for compatibility actually generate pointers in the low 4GB) 7.1. Add __attribute__((pointer_size(XXX))) and #pragma pointer_size to allow 64-bit pointers in 32-bit mode and viceversa 7.2. Surround glibc headers with #pragma pointer_size 64 7.3. Modify the dynamic loader to support different namespaces and directories for x86-32 and x86-64. Symbols starting with "__64_" or "__32_" or similar could be mapped to the other namespace. Also support "multiarchitecture" objects that would be added to both. 7.4. Split malloc/mmap in __32_malloc, __32_mmap and similar in glibc. glibc itself would use 32-bit allocations and be loaded in the low 4GB. 7.5. Compile the result and use a modified libc/dynamic loader compiled in x86-64 mode flagged as multiarchitecture to load both x86-32 and x86-64 objects 8. Modify popular programs to explicitly use 64-bit allocations and pointers for potentially huge allocations (e.g. database caches, compression program data structures, P2P software file mappings) Patches are against GCC 4.2.2 and Binutils HEAD.
Index: bfd/elf64-x86-64.c =================================================================== RCS file: /cvs/src/src/bfd/elf64-x86-64.c,v retrieving revision 1.144 diff -u -r1.144 elf64-x86-64.c --- bfd/elf64-x86-64.c 18 Oct 2007 09:13:51 -0000 1.144 +++ bfd/elf64-x86-64.c 25 Nov 2007 14:19:17 -0000 @@ -1038,6 +1038,8 @@ case R_X86_64_TPOFF32: if (info->shared) { + if(0) + { (*_bfd_error_handler) (_("%B: relocation %s against `%s' can not be used when making a shared object; recompile with -fPIC"), abfd, @@ -1045,6 +1047,7 @@ (h) ? h->root.root.string : "a local symbol"); bfd_set_error (bfd_error_bad_value); return FALSE; + } } break; @@ -1198,6 +1201,8 @@ && (sec->flags & SEC_ALLOC) != 0 && (sec->flags & SEC_READONLY) != 0) { + if(0) + { (*_bfd_error_handler) (_("%B: relocation %s against `%s' can not be used when making a shared object; recompile with -fPIC"), abfd, @@ -1205,6 +1210,7 @@ (h) ? h->root.root.string : "a local symbol"); bfd_set_error (bfd_error_bad_value); return FALSE; + } } /* Fall through. */ @@ -2599,6 +2605,8 @@ || !is_32bit_relative_branch (contents, rel->r_offset))) { + if(0) + { if (h->def_regular && r_type == R_X86_64_PC32 && h->type == STT_FUNC @@ -2613,6 +2621,7 @@ h->root.root.string); bfd_set_error (bfd_error_bad_value); return FALSE; + } } /* Fall through. */
diff -ur g_orig/gcc-4.2.2/gcc/config/i386/i386.c gcc-4.2.2/gcc/config/i386/i386.c --- g_orig/gcc-4.2.2/gcc/config/i386/i386.c 2007-09-01 17:28:30.000000000 +0200 +++ gcc-4.2.2/gcc/config/i386/i386.c 2007-11-25 03:43:35.000000000 +0100 @@ -7126,7 +7128,11 @@ rtx temp = gen_reg_rtx (Pmode); rtx val = force_operand (XEXP (x, 1), temp); if (val != temp) + { + if(GET_MODE(val) != Pmode) + val = convert_to_mode (Pmode, val, 0); emit_move_insn (temp, val); + } XEXP (x, 1) = temp; return x; @@ -7137,7 +7143,11 @@ rtx temp = gen_reg_rtx (Pmode); rtx val = force_operand (XEXP (x, 0), temp); if (val != temp) + { + if(GET_MODE(val) != Pmode) + val = convert_to_mode (Pmode, val, 0); emit_move_insn (temp, val); + } XEXP (x, 0) = temp; return x; @@ -8967,7 +8978,7 @@ } } - if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) + if (flag_pic && symbolic_operand (op1, mode)) // && mode == Pmode && symbolic_operand (op1, Pmode)) { if (TARGET_MACHO && !TARGET_64BIT) { @@ -8991,9 +9002,13 @@ else { if (GET_CODE (op0) == MEM) - op1 = force_reg (Pmode, op1); + op1 = force_reg (mode, op1); else op1 = legitimize_address (op1, op1, Pmode); + if(mode != Pmode) + { + op1 = convert_to_mode (mode, op1, 0); + } } } else Only in gcc-4.2.2/gcc/config/i386: i386.c~ diff -ur g_orig/gcc-4.2.2/gcc/config/i386/i386.h gcc-4.2.2/gcc/config/i386/i386.h --- g_orig/gcc-4.2.2/gcc/config/i386/i386.h 2007-09-01 17:28:30.000000000 +0200 +++ gcc-4.2.2/gcc/config/i386/i386.h 2007-11-25 04:33:31.000000000 +0100 @@ -535,7 +535,7 @@ #define SHORT_TYPE_SIZE 16 #define INT_TYPE_SIZE 32 #define FLOAT_TYPE_SIZE 32 -#define LONG_TYPE_SIZE BITS_PER_WORD +#define LONG_TYPE_SIZE 32 #define DOUBLE_TYPE_SIZE 64 #define LONG_LONG_TYPE_SIZE 64 @@ -1804,6 +1804,8 @@ After generation of rtl, the compiler makes no further distinction between pointers and any other objects of this machine mode. */ #define Pmode (TARGET_64BIT ? DImode : SImode) +#define POINTER_SIZE 32 +#define POINTERS_EXTEND_UNSIGNED 1 /* A function address in a call instruction is a byte address (for indexing purposes) Only in gcc-4.2.2/gcc/config/i386: i386.h~ diff -ur g_orig/gcc-4.2.2/gcc/config/i386/i386.md gcc-4.2.2/gcc/config/i386/i386.md --- g_orig/gcc-4.2.2/gcc/config/i386/i386.md 2007-09-01 17:28:30.000000000 +0200 +++ gcc-4.2.2/gcc/config/i386/i386.md 2007-11-25 03:34:55.000000000 +0100 @@ -4962,7 +4962,7 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1]) && GET_MODE (operands[0]) == GET_MODE (operands[2]) && (GET_MODE (operands[0]) == GET_MODE (operands[3]) - || GET_MODE (operands[3]) == VOIDmode)" + || GET_MODE (operands[3]) == VOIDmode) && !SYMBOLIC_CONST(operands[3])" "#" "&& reload_completed" [(const_int 0)] @@ -4988,7 +4988,7 @@ (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l") (match_operand:SI 2 "register_operand" "r")) (match_operand:SI 3 "immediate_operand" "i"))))] - "TARGET_64BIT" + "TARGET_64BIT && !SYMBOLIC_CONST(operands[3])" "#" "&& reload_completed" [(set (match_dup 0) @@ -5013,7 +5013,7 @@ && (!TARGET_PARTIAL_REG_STALL || optimize_size) && GET_MODE (operands[0]) == GET_MODE (operands[1]) && (GET_MODE (operands[0]) == GET_MODE (operands[3]) - || GET_MODE (operands[3]) == VOIDmode)" + || GET_MODE (operands[3]) == VOIDmode) && !SYMBOLIC_CONST(operands[3])" "#" "&& reload_completed" [(const_int 0)] @@ -5038,7 +5038,7 @@ (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l") (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "nonmemory_operand" "ri"))))] - "TARGET_64BIT" + "TARGET_64BIT && !SYMBOLIC_CONST(operands[3])" "#" "&& reload_completed" [(set (match_dup 0) @@ -5062,7 +5062,7 @@ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) && (!TARGET_PARTIAL_REG_STALL || optimize_size) && GET_MODE (operands[0]) == GET_MODE (operands[1]) - && GET_MODE (operands[0]) == GET_MODE (operands[3])" + && GET_MODE (operands[0]) == GET_MODE (operands[3]) && !SYMBOLIC_CONST(operands[3]) && !SYMBOLIC_CONST(operands[4])" "#" "&& reload_completed" [(const_int 0)] @@ -5093,7 +5093,7 @@ (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "register_operand" "r")) (match_operand:SI 4 "immediate_operand" "i"))))] - "TARGET_64BIT" + "TARGET_64BIT && !SYMBOLIC_CONST(operands[3])" "#" "&& reload_completed" [(set (match_dup 0) @@ -5431,7 +5431,7 @@ (plus (match_operand 1 "register_operand" "") (match_operand 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "reload_completed + "0 && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(const_int 0)] { @@ -5467,7 +5467,7 @@ switch (get_attr_type (insn)) { case TYPE_LEA: - operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + operands[2] = XEXP(SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); return "lea{l}\t{%a2, %k0|%k0, %a2}"; case TYPE_INCDEC: @@ -5513,7 +5513,7 @@ (plus:SI (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed + "0 && TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(set (match_dup 0) (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] diff -ur g_orig/gcc-4.2.2/gcc/expr.c gcc-4.2.2/gcc/expr.c --- g_orig/gcc-4.2.2/gcc/expr.c 2007-09-01 17:28:30.000000000 +0200 +++ gcc-4.2.2/gcc/expr.c 2007-11-25 03:52:33.000000000 +0100 @@ -8138,6 +8138,8 @@ /* Check for a multiplication with matching signedness. */ else if (TREE_CODE (TREE_OPERAND (exp, 0)) == NOP_EXPR && TREE_CODE (type) == INTEGER_TYPE + && TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == INTEGER_TYPE + && TREE_CODE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (exp, 0), 0))) == INTEGER_TYPE && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (exp, 0), 0))) < TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (exp, 0)))) && ((TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST