Hello Michael, Very sorry for the late reply, we had exams and assignments this week and I had to read about _builtin_assume_aligned as I didn't come across this.
#pragma clang loop vectorize_assume_alignment(32) > for(int i = 0;i < n; i++){ > a[i] = b[i] + i*i; > } > for this all-access inside the loop will be aligned to 32bit, ex IR > for.cond: ; preds = %for.inc, > %entry > %5 = load i32, i32* %i, align 32, !llvm.access.group !2 > %6 = load i32, i32* %n, align 32, !llvm.access.group !2 > %cmp = icmp slt i32 %5, %6 > br i1 %cmp, label %for.body, label %for.end > > for.body: ; preds = %for.cond > %7 = load i32, i32* %i, align 32, !llvm.access.group !2 > %8 = load i32, i32* %i, align 32, !llvm.access.group !2 > %idxprom = sext i32 %8 to i64 > %arrayidx = getelementptr inbounds i32, i32* %vla1, i64 %idxprom > store i32 %7, i32* %arrayidx, align 32, !llvm.access.group !2 > br label %for.inc > > for.inc: ; preds = %for.body > %9 = load i32, i32* %i, align 32, !llvm.access.group !2 > %inc = add nsw i32 %9, 1 > store i32 %inc, i32* %i, align 32, !llvm.access.group !2 > br label %for.cond, !llvm.loop !3 > You will not need to create pointers for every array(or operand you want to perform the operation on). > void mult(float* x, int size, float factor){ > float* ax = (float*)__builtin_assume_aligned(x, 64); > for (int i = 0; i < size; ++i) > ax[i] *= factor; > } > the IR generated for this : > define void @mult(i32*, i32, float) #0 { > %4 = alloca i32*, align 8 > %5 = alloca i32, align 4 > %6 = alloca float, align 4 > %7 = alloca i32*, align 8 > %8 = alloca i32, align 4 > store i32* %0, i32** %4, align 8 > store i32 %1, i32* %5, align 4 > store float %2, float* %6, align 4 > %9 = load i32*, i32** %4, align 8 > %10 = bitcast i32* %9 to i8* > %11 = ptrtoint i8* %10 to i64 > %12 = and i64 %11, 63 > %13 = icmp eq i64 %12, 0 > call void @llvm.assume(i1 %13) > %14 = bitcast i8* %10 to i32* > store i32* %14, i32** %7, align 8 > store i32 0, i32* %8, align 4 > br label %15 > > ; <label>:15: ; preds = %29, %3 > %16 = load i32, i32* %8, align 4 > %17 = load i32, i32* %5, align 4 > %18 = icmp slt i32 %16, %17 > br i1 %18, label %19, label %32 > > ; <label>:19: ; preds = %15 > %20 = load float, float* %6, align 4 > %21 = load i32*, i32** %7, align 8 > %22 = load i32, i32* %8, align 4 > %23 = sext i32 %22 to i64 > %24 = getelementptr inbounds i32, i32* %21, i64 %23 > %25 = load i32, i32* %24, align 4 > %26 = sitofp i32 %25 to float > %27 = fmul float %26, %20 > %28 = fptosi float %27 to i32 > store i32 %28, i32* %24, align 4 > br label %29 > > ; <label>:29: ; preds = %19 > %30 = load i32, i32* %8, align 4 > %31 = add nsw i32 %30, 1 > store i32 %31, i32* %8, align 4 > br label %15 > > ; <label>:32: ; preds = %15 > ret void > } > the alignment is assumed whereas in #pragma it is set to the number specified. it'll be easier, and having a pragma for doing this will help as it's provided in OMP and intel compilers. Thank you, If I made any mistake please tell me. Happy Mahto CSE Undergrad, IIT Hyderabad On Thu, Nov 14, 2019 at 10:32 PM Michael Kruse via Phabricator < revi...@reviews.llvm.org> wrote: > Meinersbur added a comment. > > Could you elaborate why this is better than `__builtin_assume_aligned`? > > > Repository: > rG LLVM Github Monorepo > > CHANGES SINCE LAST ACTION > https://reviews.llvm.org/D69897/new/ > > https://reviews.llvm.org/D69897 > > > >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits