[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-05-10 Thread Strahinja Petrovic via Phabricator via cfe-commits
This revision was not accepted when it landed; it landed in state "Needs 
Review".
This revision was automatically updated to reflect the committed changes.
Closed by commit rL331979: This patch provides that bitfields are splitted even 
in case (authored by spetrovic, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D39053?vs=143918&id=146117#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D39053

Files:
  cfe/trunk/include/clang/Driver/Options.td
  cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp
  cfe/trunk/test/CodeGenCXX/finegrain-bitfield-type.cpp

Index: cfe/trunk/include/clang/Driver/Options.td
===
--- cfe/trunk/include/clang/Driver/Options.td
+++ cfe/trunk/include/clang/Driver/Options.td
@@ -1156,7 +1156,7 @@
 
 def ffine_grained_bitfield_accesses : Flag<["-"],
   "ffine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>,
-  HelpText<"Use separate accesses for bitfields with legal widths and alignments.">;
+  HelpText<"Use separate accesses for consecutive bitfield runs with legal widths and alignments.">;
 def fno_fine_grained_bitfield_accesses : Flag<["-"],
   "fno-fine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>,
   HelpText<"Use large-integer access for consecutive bitfield runs.">;
Index: cfe/trunk/test/CodeGenCXX/finegrain-bitfield-type.cpp
===
--- cfe/trunk/test/CodeGenCXX/finegrain-bitfield-type.cpp
+++ cfe/trunk/test/CodeGenCXX/finegrain-bitfield-type.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+struct S4 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:12;
+};
+struct S4 a4;
+
+struct S5 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:28;
+  unsigned long f4:4;
+  unsigned long f5:12;
+};
+struct S5 a5;
+
+// CHECK: %struct.S4 = type { i32, i16 }
+// CHECK-NOT: %struct.S4 = type { i48 }
+// CHECK: %struct.S5 = type { i32, i32, i16, [6 x i8] }
+// CHECK-NOT: %struct.S5 = type { i80 }
\ No newline at end of file
Index: cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp
===
--- cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -404,19 +404,20 @@
 return;
   }
 
-  // Check if current Field is better as a single field run. When current field
+  // Check if OffsetInRecord is better as a single field run. When OffsetInRecord
   // has legal integer width, and its bitfield offset is naturally aligned, it
   // is better to make the bitfield a separate storage component so as it can be
   // accessed directly with lower cost.
-  auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) {
+  auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord,
+  uint64_t StartBitOffset) {
 if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
   return false;
-unsigned Width = Field->getBitWidthValue(Context);
-if (!DataLayout.isLegalInteger(Width))
+if (!DataLayout.isLegalInteger(OffsetInRecord))
   return false;
-// Make sure Field is natually aligned if it is treated as an IType integer.
-if (getFieldBitOffset(*Field) %
-Context.toBits(getAlignment(getIntNType(Width))) !=
+// Make sure StartBitOffset is natually aligned if it is treated as an
+// IType integer.
+ if (StartBitOffset %
+Context.toBits(getAlignment(getIntNType(OffsetInRecord))) !=
 0)
   return false;
 return true;
@@ -435,23 +436,24 @@
 Run = Field;
 StartBitOffset = getFieldBitOffset(*Field);
 Tail = StartBitOffset + Field->getBitWidthValue(Context);
-StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run);
+StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset,
+ StartBitOffset); 
   }
   ++Field;
   continue;
 }
 
 // If the start field of a new run is better as a single run, or
-// if current field is better as a single run, or
+// if current field (or consecutive fields) is better as a single run, or
 // if current field has zero width bitfield and either
 // UseZeroLengthBitfieldAlignment or UseBitFieldTypeAlignment is set to
 // true, or
 // if the offset of current field is inconsistent with the offset of
 // previous field plus its offset,
 // skip the block below and go ahead to emit the storage.
 // Otherwise, try to add bitfields to the run.
 if (!StartFieldAsSingleRun && Field != FieldEnd &&
-!IsBetterAsSingleFieldRun(Field) &&
+!IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset) &&
 (!Field->isZeroLengthBitField(Context) ||
  

[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-05-10 Thread Hal Finkel via Phabricator via cfe-commits
hfinkel added a comment.

In https://reviews.llvm.org/D39053#1093977, @asb wrote:

> Just wanted to explicitly say that I'm happy the updated patch reflects the 
> changes to docs and comments I requested. @hfinkel - are you happy for this 
> to land now?


Yes, go ahead.

Some of these benefits might still be capturable using the regular method and 
backend improvements, but with this under the option it should be easier to 
identify where those opportunities are.


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-05-09 Thread Alex Bradbury via Phabricator via cfe-commits
asb added a comment.

Just wanted to explicitly say that I'm happy the updated patch reflects the 
changes to docs and comments I requested. @hfinkel - are you happy for this to 
land now?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-05-09 Thread Ana Pazos via Phabricator via cfe-commits
apazos added a comment.

Thanks for updating the patch, @spetrovic.
Can we have this committed?
This patch has shown to produce code size improvements for a number of targets 
(Mips, X86, ARM, RISC-V).


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-04-25 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic updated this revision to Diff 143918.
spetrovic added a comment.

Comments addressed


https://reviews.llvm.org/D39053

Files:
  include/clang/Driver/Options.td
  lib/CodeGen/CGRecordLayoutBuilder.cpp
  test/CodeGenCXX/finegrain-bitfield-type.cpp

Index: test/CodeGenCXX/finegrain-bitfield-type.cpp
===
--- test/CodeGenCXX/finegrain-bitfield-type.cpp
+++ test/CodeGenCXX/finegrain-bitfield-type.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+struct S4 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:12;
+};
+struct S4 a4;
+
+struct S5 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:28;
+  unsigned long f4:4;
+  unsigned long f5:12;
+};
+struct S5 a5;
+
+// CHECK: %struct.S4 = type { i32, i16 }
+// CHECK-NOT: %struct.S4 = type { i48 }
+// CHECK: %struct.S5 = type { i32, i32, i16, [6 x i8] }
+// CHECK-NOT: %struct.S5 = type { i80 }
\ No newline at end of file
Index: lib/CodeGen/CGRecordLayoutBuilder.cpp
===
--- lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -404,19 +404,20 @@
 return;
   }
 
-  // Check if current Field is better as a single field run. When current field
+  // Check if OffsetInRecord is better as a single field run. When OffsetInRecord
   // has legal integer width, and its bitfield offset is naturally aligned, it
   // is better to make the bitfield a separate storage component so as it can be
   // accessed directly with lower cost.
-  auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) {
+  auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord,
+  uint64_t StartBitOffset) {
 if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
   return false;
-unsigned Width = Field->getBitWidthValue(Context);
-if (!DataLayout.isLegalInteger(Width))
+if (!DataLayout.isLegalInteger(OffsetInRecord))
   return false;
-// Make sure Field is natually aligned if it is treated as an IType integer.
-if (getFieldBitOffset(*Field) %
-Context.toBits(getAlignment(getIntNType(Width))) !=
+// Make sure StartBitOffset is natually aligned if it is treated as an
+// IType integer.
+ if (StartBitOffset %
+Context.toBits(getAlignment(getIntNType(OffsetInRecord))) !=
 0)
   return false;
 return true;
@@ -435,23 +436,24 @@
 Run = Field;
 StartBitOffset = getFieldBitOffset(*Field);
 Tail = StartBitOffset + Field->getBitWidthValue(Context);
-StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run);
+StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset,
+ StartBitOffset); 
   }
   ++Field;
   continue;
 }
 
 // If the start field of a new run is better as a single run, or
-// if current field is better as a single run, or
+// if current field (or consecutive fields) is better as a single run, or
 // if current field has zero width bitfield and either
 // UseZeroLengthBitfieldAlignment or UseBitFieldTypeAlignment is set to
 // true, or
 // if the offset of current field is inconsistent with the offset of
 // previous field plus its offset,
 // skip the block below and go ahead to emit the storage.
 // Otherwise, try to add bitfields to the run.
 if (!StartFieldAsSingleRun && Field != FieldEnd &&
-!IsBetterAsSingleFieldRun(Field) &&
+!IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset) &&
 (!Field->isZeroLengthBitField(Context) ||
  (!Context.getTargetInfo().useZeroLengthBitfieldAlignment() &&
   !Context.getTargetInfo().useBitFieldTypeAlignment())) &&
Index: include/clang/Driver/Options.td
===
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1138,7 +1138,7 @@
 
 def ffine_grained_bitfield_accesses : Flag<["-"],
   "ffine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>,
-  HelpText<"Use separate accesses for bitfields with legal widths and alignments.">;
+  HelpText<"Use separate accesses for consecutive bitfield runs with legal widths and alignments.">;
 def fno_fine_grained_bitfield_accesses : Flag<["-"],
   "fno-fine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>,
   HelpText<"Use large-integer access for consecutive bitfield runs.">;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-04-20 Thread Mandeep Singh Grang via Phabricator via cfe-commits
mgrang added a comment.

Here is a test case which improves with this patch (for RISCV target). It is 
able to detect load/store halfword for size 16 bitfields.

  struct st {
int a:1;
int b:8;
int c:11;
int d:12;
int e:16;
int f:16;
int g:16;
  } S;
  
  void foo(int x) {
S.e = x;
  }

GCC:

   :
 0: 07b7lui x15,0x0
 4: 00a79223sh  x10,4(x15) # 4 
 8: 8067jalrx0,0(x1)

LLVM without this patch:

   :
 0: 000105b7lui x11,0x10
 4: fff58593addix11,x11,-1 #  
 8: 00b57533and x10,x10,x11
 c: 05b7lui x11,0x0
10: 00058593addix11,x11,0 # 0 
14: 0045a603lw  x12,4(x11)
18: 06b7lui x13,0x0
1c: 00d67633and x12,x12,x13
20: 00a66533or  x10,x12,x10
24: 00a5a223sw  x10,4(x11)
28: 8067jalrx0,0(x1)

LLVM with this patch:

   :
 0: 05b7lui x11,0x0
 4: 00058593addix11,x11,0 # 0 
 8: 00a59223sh  x10,4(x11)
 c: 8067jalrx0,0(x1)




https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-04-19 Thread Alex Bradbury via Phabricator via cfe-commits
asb added a comment.

Hi @spetrovic - I think Hal Finkel's earlier request to update the comments of 
IsBetterAsSingleFieldRun remains unaddressed. It looks like at least the 
comment string immediately before `auto IsBetterAsSingleFieldRun` needs to be 
updated to reflect the changed behaviour.

The documentation `-ffine-grained-bitfield-accesses` option may also need to be 
updated. Iit currently reads "Use separate accesses for bitfields with legal 
widths and alignments.". I think "Use separate accesses for consecutive 
bitfield runs with legal widths and alignments" might better reflect the 
behaviour after this patch. Do you agree?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-04-18 Thread Mandeep Singh Grang via Phabricator via cfe-commits
mgrang added a comment.

With this patch we get ~1800 bytes improvement on one of our internal 
codebases. I also ran spec2000/spec2006 validations (for RISCV) and there were 
no regressions. There were also no code size improvements seen in spec.


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-04-17 Thread Mandeep Singh Grang via Phabricator via cfe-commits
mgrang added a comment.

Added @apazos and myself a reviewers since this is something we would be 
interested in getting to work.


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-04-16 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

ping


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-03-29 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

ping


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-03-08 Thread Petar Jovanovic via Phabricator via cfe-commits
petarj added a comment.

Is everyone OK with the patch now?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-01-22 Thread Wei Mi via Phabricator via cfe-commits
wmi added a comment.

Thanks for the size evaluation. I regarded the change as a natural and limited 
extension to the current fine-grain bitfield access mode, so I feel ok with the 
change. Hal, what do you think?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2018-01-19 Thread Petar Jovanovic via Phabricator via cfe-commits
petarj added a comment.

This sounds as a valid improvement. Can we have this code committed?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-12-21 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

ping


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-12-11 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

ping


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-11-29 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

I tried to compile some important libraries for X86 and MIPS64 within Chromium 
with clang/llvm. I have compared results between LLVM trunk, and LLVM trunk 
with my patch. There is code size improvement on many libraries, here are some 
results:

- X86

libframe~1.5%
librendering~1.2%
liblayout_2 ~0.7%
libpaint_1  ~0.65%
libglslang  ~0.5%
libediting_3~0.5%
libcore_generated   ~0.5%
libanimation_1  ~0.5%

- Mips64

libglslang  ~3.2%
libframe~1.5%
liblayout_0 ~1.2%
libGLESv2   ~1%
librendering~0.7%
liblayout_1 ~0.5%
libcss_9~0.5%

Those are just some of libraries where we have code size improvement with this 
patch, I have also tried to compile LLVM with LLVM (with my patch) for x86 and 
MIPS64, and there is also code size improvement on both architectures. For 
example within clang executable for MIPS64 ~350 unaligned loads is removed. So, 
what do you think about it ?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-11-13 Thread Wei Mi via Phabricator via cfe-commits
wmi added a comment.

I think it may be hard to fix the problem in backend. It will face the same 
issue of store-to-load forwarding if at some places the transformation happens 
but at some other places somehow it doesn't.

But I am not sure whether it is acceptable to add more finegrain bitfield 
access transformations in frontend. It is a tradeoff. From my experience trying 
your patch on x8664, I saw saving of some instructions because with the 
transformation we now use shorter immediate consts which can be folded into 
other instructions instead of loading them in separate moves. But the bit 
operations are not saved (I may be wrong), so I have no idea whether it is 
critical for performance (To provide some background, we introduced finegrain 
field access because the original problem introduced a serious performance 
issue in some critical libraries. But doing this kind of bitfield 
transformations in frontend can potentially harm some other applications. That 
is why it is off by default, and Hal had concern about adding more such 
transformations. ).  Do you have performance number on some benchmarks to 
justify its importance? That may help folks to make a better trade off here.


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-11-07 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

ping


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-11-01 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

I was looking if I can reslove this problem in backend. Example:

C code:

typedef struct {
unsigned int f1 : 28;
unsigned int f2 : 4;
unsigned int f3 : 12;
} S5;

void foo(S5 *cmd) {
cmd->f3 = 5;
}

. ll file (without the patch):

%struct.S5 = type { i48 }

define void @foo(%struct.S5* nocapture %cmd) local_unnamed_addr #0 {
entry:

  %0 = bitcast %struct.S5* %cmd to i64*
  %bf.load = load i64, i64* %0, align 4
  %bf.clear = and i64 %bf.load, -4293918721
  %bf.set = or i64 %bf.clear, 5242880
  store i64 %bf.set, i64* %0, align 4
  ret void

}

The load (%bf.load = load i64, i64* %0, align 4) is NON_EXTLOAD, and backend 
handles it later as unaligned load. So, maybe one of possible solutions in 
backend is for each architecture to try to catch node pattern and replace 
unaligned loads with normal loads in specific cases, but I'm not sure how 
feasible that is. At the moment, the solution in frontend seems better to me. 
What do you think?


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-10-26 Thread Hal Finkel via Phabricator via cfe-commits
hfinkel added a comment.

In https://reviews.llvm.org/D39053#906513, @spetrovic wrote:

> Well, basically I'm just expanding the existing algorithm, why should we 
> split fields just in case when current field is integer,
>  I'm not resolving specific problem with unaligned loads/stores on MIPS.
>
> In this example:
>
> typedef struct {
>
>   unsigned int f1 : 28;
>   unsigned int f2 : 4;
>   unsigned int f3 : 12;
>
> } S5;
>
> S5 *cmd;
>
> void foo() {
>
>   cmd->f3 = 5;
>
> }
>
> With this patch there is improvement in code size not just on MIPS 
> architecture, on X86 and ARM is also improved code size. If structure S5 is 
> treated as i48 type there are extra instructions for reading it not just on 
> MIPS. We can take results for MIPS just for example:
>
> Output without the patch:
>
>  :
>
>0: 3c01lui at,0x0
>4: 0039082ddaddu   at,at,t9
>8: 6421daddiu  at,at,0
>c: dc21ld  at,0(at)
>   10: dc21ld  at,0(at)
>   14: 6822ldl v0,0(at)
>   18: 6c220007ldr v0,7(at)
>   1c: 64030005daddiu  v1,zero,5
>   20: 7c62fd07dinsv0,v1,0x14,0xc
>   24: b022sdl v0,0(at)
>   28: 03e8jr  ra
>   2c: b4220007sdr v0,7(at)
>   
>   
>
> Output with the patch:
>
>  :
>
>0: 3c01lui at,0x0
>4: 0039082ddaddu   at,at,t9
>8: 6421daddiu  at,at,0
>c: dc21ld  at,0(at)
>   10: dc21ld  at,0(at)
>   14: 94220004lhu v0,4(at)
>   18: 24030005li  v1,5
>   1c: 7c62f904ins v0,v1,0x4,0x1c
>   20: 03e8jr  ra
>   24: a4220004sh  v0,4(at)
>   
>
> This is simple example, in more complicated examples there is more 
> improvement.


I think this is part of the slippery slope we didn't want to go down. We 
introduced this mode in the first place only to resolve a store-to-load 
forwarding problem that is theoretically unsolvable by any local lowering 
decisions. This, on the other hand, looks like a local code-generation problem 
that we can/should fix in the backend. If I'm wrong, we should consider this as 
well.


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-10-25 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic added a comment.

Well, basically I'm just expanding the existing algorithm, why should we split 
fields just in case when current field is integer,
I'm not resolving specific problem with unaligned loads/stores on MIPS.

In this example:

typedef struct {

  unsigned int f1 : 28;
  unsigned int f2 : 4;
  unsigned int f3 : 12;

} S5;

S5 *cmd;

void foo() {
cmd->f3 = 5;
}

With this patch there is improvement in code size not just on MIPS 
architecture, on X86 and ARM is also improved code size. If structure S5 is 
treated as i48 type there are extra instructions for reading it not just on 
MIPS. We can take results for MIPS just for example:

Output without the patch:

 :

   0:   3c01lui at,0x0
   4:   0039082ddaddu   at,at,t9
   8:   6421daddiu  at,at,0
   c:   dc21ld  at,0(at)
  10:   dc21ld  at,0(at)
  14:   6822ldl v0,0(at)
  18:   6c220007ldr v0,7(at)
  1c:   64030005daddiu  v1,zero,5
  20:   7c62fd07dinsv0,v1,0x14,0xc
  24:   b022sdl v0,0(at)
  28:   03e8jr  ra
  2c:   b4220007sdr v0,7(at)

Output with the patch:

 :

   0:   3c01lui at,0x0
   4:   0039082ddaddu   at,at,t9
   8:   6421daddiu  at,at,0
   c:   dc21ld  at,0(at)
  10:   dc21ld  at,0(at)
  14:   94220004lhu v0,4(at)
  18:   24030005li  v1,5
  1c:   7c62f904ins v0,v1,0x4,0x1c
  20:   03e8jr  ra
  24:   a4220004sh  v0,4(at)

This is simple example, in more complicated examples there is more improvement.


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-10-23 Thread Hal Finkel via Phabricator via cfe-commits
hfinkel added a comment.

> With this patch we avoid unaligned loads and stores, at least on MIPS 
> architecture.

Can you please explain the nature of the problematic situations?

Also, this patch does not update the comments that describe the splitting 
algorithm. That should be improved.

If this is just addressing the classic problem that, once we narrow a load we 
can't re-widen it, it might be best to solve this another way (e.g., by adding 
metadata noting the dereferenceable range).


https://reviews.llvm.org/D39053



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39053: [Bitfield] Add more cases to making the bitfield a separate location

2017-10-18 Thread Strahinja Petrovic via Phabricator via cfe-commits
spetrovic created this revision.
Herald added subscribers: arichardson, sdardis.

This patch provides that bitfields are splitted even in case when current field 
is not legal integer type. For Example,

struct S3 {

  unsigned long a1:28;
  unsigned long a2:4;
  unsigned long a3:12;

};

struct S4 {

  unsigned long long b1:61;
  unsigned long b2:3;
  unsigned long b3:3;

};

At the moment, S3 is i48 type, and S4 is i72 type, with this change S3 is 
treated as i32 + 16, and S4 is treated as i32 + i32 + i8.
With this patch we avoid unaligned loads and stores, at least on MIPS 
architecture.


https://reviews.llvm.org/D39053

Files:
  lib/CodeGen/CGRecordLayoutBuilder.cpp
  test/CodeGenCXX/finegrain-bitfield-type.cpp


Index: test/CodeGenCXX/finegrain-bitfield-type.cpp
===
--- test/CodeGenCXX/finegrain-bitfield-type.cpp
+++ test/CodeGenCXX/finegrain-bitfield-type.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+struct S4 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:12;
+};
+struct S4 a4;
+
+struct S5 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:28;
+  unsigned long f4:4;
+  unsigned long f5:12;
+};
+struct S5 a5;
+
+
+// CHECK: %struct.S4 = type { i32, i16 }
+// CHECK-NOT: %struct.S4 = type { i48 }
+// CHECK: %struct.S5 = type { i32, i32, i16, [6 x i8] }
+// CHECK-NOT: %struct.S5 = type { i80 }
\ No newline at end of file
Index: lib/CodeGen/CGRecordLayoutBuilder.cpp
===
--- lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -408,15 +408,15 @@
   // has legal integer width, and its bitfield offset is naturally aligned, it
   // is better to make the bitfield a separate storage component so as it can 
be
   // accessed directly with lower cost.
-  auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) {
+  auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord,
+  uint64_t StartBitOffset) {
 if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
   return false;
-unsigned Width = Field->getBitWidthValue(Context);
-if (!DataLayout.isLegalInteger(Width))
+if (!DataLayout.isLegalInteger(OffsetInRecord))
   return false;
 // Make sure Field is natually aligned if it is treated as an IType 
integer.
-if (getFieldBitOffset(*Field) %
-Context.toBits(getAlignment(getIntNType(Width))) !=
+if (StartBitOffset %
+Context.toBits(getAlignment(getIntNType(OffsetInRecord))) !=
 0)
   return false;
 return true;
@@ -435,7 +435,8 @@
 Run = Field;
 StartBitOffset = getFieldBitOffset(*Field);
 Tail = StartBitOffset + Field->getBitWidthValue(Context);
-StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run);
+StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset,
+ StartBitOffset);
   }
   ++Field;
   continue;
@@ -449,7 +450,7 @@
 // skip the block below and go ahead to emit the storage.
 // Otherwise, try to add bitfields to the run.
 if (!StartFieldAsSingleRun && Field != FieldEnd &&
-!IsBetterAsSingleFieldRun(Field) &&
+!IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset) &&
 Field->getBitWidthValue(Context) != 0 &&
 Tail == getFieldBitOffset(*Field)) {
   Tail += Field->getBitWidthValue(Context);


Index: test/CodeGenCXX/finegrain-bitfield-type.cpp
===
--- test/CodeGenCXX/finegrain-bitfield-type.cpp
+++ test/CodeGenCXX/finegrain-bitfield-type.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \
+// RUN:   -emit-llvm -o - %s | FileCheck %s
+struct S4 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:12;
+};
+struct S4 a4;
+
+struct S5 {
+  unsigned long f1:28;
+  unsigned long f2:4;
+  unsigned long f3:28;
+  unsigned long f4:4;
+  unsigned long f5:12;
+};
+struct S5 a5;
+
+
+// CHECK: %struct.S4 = type { i32, i16 }
+// CHECK-NOT: %struct.S4 = type { i48 }
+// CHECK: %struct.S5 = type { i32, i32, i16, [6 x i8] }
+// CHECK-NOT: %struct.S5 = type { i80 }
\ No newline at end of file
Index: lib/CodeGen/CGRecordLayoutBuilder.cpp
===
--- lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -408,15 +408,15 @@
   // has legal integer width, and its bitfield offset is naturally aligned, it
   // is better to make the bitfield a separate storage component so as it can be
   // accessed directly with lower cost.
-  auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) {
+  auto IsBetterAsS