yaxunl updated this revision to Diff 101955.
yaxunl retitled this revision from "Support synchronisation scope in Clang 
atomic builtin functions" to "Add OpenCL 2.0 atomic builtin functions as Clang 
builtin".
yaxunl edited the summary of this revision.
yaxunl added a comment.

Add __opencl_atomic_ builtins.


https://reviews.llvm.org/D28691

Files:
  docs/LanguageExtensions.rst
  include/clang/AST/Expr.h
  include/clang/Basic/Builtins.def
  lib/AST/ASTContext.cpp
  lib/AST/Expr.cpp
  lib/AST/StmtPrinter.cpp
  lib/Basic/Targets.cpp
  lib/CodeGen/CGAtomic.cpp
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/Headers/opencl-c.h
  lib/Sema/SemaChecking.cpp
  test/CodeGenOpenCL/atomic-ops-libcall.cl
  test/CodeGenOpenCL/atomic-ops.cl
  test/SemaOpenCL/atomic-ops.cl

Index: test/SemaOpenCL/atomic-ops.cl
===================================================================
--- /dev/null
+++ test/SemaOpenCL/atomic-ops.cl
@@ -0,0 +1,141 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=spir64
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=amdgcn-amdhsa-amd-opencl
+
+// Basic parsing/Sema tests for __opencl_atomic_*
+
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
+
+struct S { char c[3]; };
+
+char i8;
+short i16;
+int i32;
+int8 i64;
+
+atomic_int gn;
+
+void f(atomic_int *i, const atomic_int *ci,
+       atomic_intptr_t *p, atomic_float *d,
+       int *I, const int *CI,
+       intptr_t *P, float *D, struct S *s1, struct S *s2) {
+  __opencl_atomic_init(I, 5); // expected-error {{pointer to _Atomic}}
+  __opencl_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_load(0); // expected-error {{too few arguments to function}}
+  __opencl_atomic_load(0,0,0,0); // expected-error {{too many arguments to function}}
+  __opencl_atomic_store(0,0,0,0); // expected-error {{address argument to atomic builtin must be a pointer}}
+  __opencl_atomic_store((int*)0,0,0,0); // expected-error {{address argument to atomic operation must be a pointer to _Atomic}}
+  __opencl_atomic_store(i, 0, memory_order_relaxed, memory_scope_work_item);
+  __opencl_atomic_store(ci, 0, memory_order_relaxed, memory_scope_work_item); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_load(p, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_load(d, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_load(ci, memory_order_seq_cst, memory_scope_work_item); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_store(p, 1, memory_order_seq_cst, memory_scope_work_item);
+  (int)__opencl_atomic_store(d, 1, memory_order_seq_cst, memory_scope_work_item); // expected-error {{operand of type 'void'}}
+
+  int exchange_1 = __opencl_atomic_exchange(i, 1, memory_order_seq_cst, memory_scope_work_item);
+  int exchange_2 = __opencl_atomic_exchange(I, 1, memory_order_seq_cst, memory_scope_work_item); // expected-error {{must be a pointer to _Atomic}}
+
+  __opencl_atomic_fetch_add(i, 1, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_fetch_add(p, 1, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_item); // expected-error {{must be a pointer to atomic integer or pointer}}
+
+  __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_fetch_and(p, 1, memory_order_seq_cst, memory_scope_work_item);
+  __opencl_atomic_fetch_and(d, 1, memory_order_seq_cst, memory_scope_work_item); // expected-error {{must be a pointer to atomic integer}}
+
+  bool cmpexch_1 = __opencl_atomic_compare_exchange_strong(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+  bool cmpexch_2 = __opencl_atomic_compare_exchange_strong(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+  bool cmpexch_3 = __opencl_atomic_compare_exchange_strong(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item); // expected-warning {{incompatible pointer types}}
+  (void)__opencl_atomic_compare_exchange_strong(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}}
+
+  bool cmpexchw_1 = __opencl_atomic_compare_exchange_weak(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+  bool cmpexchw_2 = __opencl_atomic_compare_exchange_weak(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+  bool cmpexchw_3 = __opencl_atomic_compare_exchange_weak(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item); // expected-warning {{incompatible pointer types}}
+  (void)__opencl_atomic_compare_exchange_weak(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}}
+
+  // Pointers to different address spaces are allowed.
+  bool cmpexch_10 = __opencl_atomic_compare_exchange_strong((global atomic_int*)0x308, (constant int*)0x309, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+
+  __opencl_atomic_init(ci, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+  __opencl_atomic_store(ci, 0, memory_order_release, memory_scope_work_item); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+  __opencl_atomic_load(ci, memory_order_acquire, memory_scope_work_item); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_init(&gn, 456);
+  __opencl_atomic_init(&gn, (void*)0); // expected-warning{{incompatible pointer to integer conversion passing '__generic void *' to parameter of type 'int'}}
+}
+
+void memory_checks(atomic_int *Ap, int *p, int val) {
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_load(Ap, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_load(Ap, memory_order_consume, memory_scope_work_item); // expected-error {{use of undeclared identifier 'memory_order_consume'}}
+  (void)__opencl_atomic_load(Ap, memory_order_release, memory_scope_work_item); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_load(Ap, memory_order_acq_rel, memory_scope_work_item); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_load(Ap, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_store(Ap, val, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_store(Ap, val, memory_order_acquire, memory_scope_work_item); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_store(Ap, val, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_store(Ap, val, memory_order_acq_rel, memory_scope_work_item); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_store(Ap, val, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acq_rel, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_init(Ap, val);
+
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acq_rel, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acq_rel, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acq_rel, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acq_rel, memory_scope_work_item);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_acquire, memory_scope_work_item);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_release, memory_scope_work_item);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_acq_rel, memory_scope_work_item);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_seq_cst, memory_scope_work_item);
+
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_item);
+
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_item);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_item);
+}
+
+void nullPointerWarning(atomic_int *Ap, int *p, int val) {
+  // The 'expected' pointer shouldn't be NULL.
+  (void)__opencl_atomic_compare_exchange_strong(Ap, NULL, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_item); // expected-warning {{null passed to a callee that requires a non-null argument}}
+}
Index: test/CodeGenOpenCL/atomic-ops.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/atomic-ops.cl
@@ -0,0 +1,162 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -finclude-default-header -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | FileCheck %s
+
+// Also test serialization of atomic operations here, to avoid duplicating the test.
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
+
+#ifndef ALREADY_INCLUDED
+#define ALREADY_INCLUDED
+
+atomic_int j;
+
+void fi1(atomic_int *i) {
+  // CHECK-LABEL: @fi1
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} singlethread seq_cst
+  int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_item);
+}
+
+void fi2(atomic_int *i) {
+  // CHECK-LABEL: @fi2
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} singlethread seq_cst
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_item);
+}
+
+void fi3(atomic_int *i) {
+  // CHECK-LABEL: @fi3
+  // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} singlethread seq_cst
+  int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_item);
+}
+
+bool fi4(atomic_int *i) {
+  // CHECK-LABEL: @fi4(
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] singlethread acquire acquire
+  // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+  // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+  // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+  // CHECK: store i32 [[OLD]]
+  int cmp = 0;
+  return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_item);
+}
+
+float ff1(global atomic_float *d) {
+  // CHECK-LABEL: @ff1
+  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} singlethread monotonic
+  return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_item);
+}
+
+void ff2(atomic_float *d) {
+  // CHECK-LABEL: @ff2
+  // CHECK: store atomic i32 {{.*}} singlethread release
+  __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_item);
+}
+
+float ff3(atomic_float *d) {
+  // CHECK-LABEL: @ff3
+  // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} singlethread seq_cst
+  return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_item);
+}
+
+// CHECK-LABEL: @atomic_init_foo
+void atomic_init_foo()
+{
+  // CHECK-NOT: atomic
+  // CHECK: store
+  __opencl_atomic_init(&j, 42);
+
+  // CHECK-NOT: atomic
+  // CHECK: }
+}
+
+// CHECK-LABEL: @failureOrder
+void failureOrder(atomic_int *ptr, int *ptr2) {
+  // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} singlethread acquire monotonic
+  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_item);
+
+  // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} singlethread seq_cst acquire
+  __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_item);
+}
+
+// CHECK-LABEL: @generalFailureOrder
+void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
+  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_item);
+  // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+  // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
+
+  // CHECK: [[MONOTONIC]]
+  // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: ]
+
+  // CHECK: [[ACQUIRE]]
+  // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[RELEASE]]
+  // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: ]
+
+  // CHECK: [[ACQREL]]
+  // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[SEQCST]]
+  // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[MONOTONIC_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} monotonic monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQUIRE_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} acquire monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQUIRE_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} acquire acquire
+  // CHECK: br
+
+  // CHECK: [[ACQREL_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} acq_rel monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQREL_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} acq_rel acquire
+  // CHECK: br
+
+  // CHECK: [[SEQCST_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} seq_cst monotonic
+  // CHECK: br
+
+  // CHECK: [[SEQCST_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} seq_cst acquire
+  // CHECK: br
+
+  // CHECK: [[SEQCST_SEQCST]]
+  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+  // CHECK: br
+}
+
+int test_volatile(volatile atomic_int *i) {
+  // CHECK-LABEL: @test_volatile
+  // CHECK:      %[[i_addr:.*]] = alloca i32
+  // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+  // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]]
+  // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]]
+  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] singlethread seq_cst
+  // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
+  // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
+  // CHECK-NEXT: ret i32 %[[retval]]
+  return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_item);
+}
+
+#endif
Index: test/CodeGenOpenCL/atomic-ops-libcall.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/atomic-ops-libcall.cl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=GEN4 %s
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=GEN0 %s
+
+void f(atomic_int *i, int cmp) {
+  int x;
+  // GEN4: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 0)
+  // GEN0: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 0)
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_item);
+  // GEN4: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 0)
+  // GEN0: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 0)
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_item);
+  // GEN4: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 0)
+  // GEN0: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 0)
+  x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_item);
+  // GEN4: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 0)
+  // GEN0: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 0)
+  x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+  // GEN4: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 0)
+  // GEN0: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 0)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_item);
+}
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -2723,15 +2723,18 @@
   auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering;
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     llvm_unreachable("There is no ordering argument for an init");
 
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load:
     return OrderingCABI != llvm::AtomicOrderingCABI::release &&
            OrderingCABI != llvm::AtomicOrderingCABI::acq_rel;
 
   case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n:
     return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
@@ -2748,7 +2751,9 @@
   CallExpr *TheCall = cast<CallExpr>(TheCallResult.get());
   DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
 
-  // All these operations take one of the following forms:
+  // All the non-OpenCL operations take one of the following forms.
+  // The OpenCL operations take the __c11 forms with one extra argument for
+  // synchronization scope.
   enum {
     // C    __c11_atomic_init(A *, C)
     Init,
@@ -2769,6 +2774,7 @@
     // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
     GNUCmpXchg
   } Form = Init;
+  const unsigned NumForm = GNUCmpXchg + 1;
   const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
   const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
   // where:
@@ -2778,24 +2784,32 @@
   //   M is C if C is an integer, and ptrdiff_t if C is a pointer, and
   //   the int parameters are for orderings.
 
+  static_assert(sizeof(NumArgs) / sizeof(NumArgs[0]) == NumForm &&
+                    sizeof(NumVals) / sizeof(NumVals[0]) == NumForm,
+                "need to update code for modified forms");
   static_assert(AtomicExpr::AO__c11_atomic_init == 0 &&
                     AtomicExpr::AO__c11_atomic_fetch_xor + 1 ==
                         AtomicExpr::AO__atomic_load,
                 "need to update code for modified C11 atomics");
-  bool IsC11 = Op >= AtomicExpr::AO__c11_atomic_init &&
-               Op <= AtomicExpr::AO__c11_atomic_fetch_xor;
+  bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init &&
+                  Op <= AtomicExpr::AO__opencl_atomic_fetch_xor;
+  bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init &&
+                Op <= AtomicExpr::AO__c11_atomic_fetch_xor) ||
+               IsOpenCL;
   bool IsN = Op == AtomicExpr::AO__atomic_load_n ||
              Op == AtomicExpr::AO__atomic_store_n ||
              Op == AtomicExpr::AO__atomic_exchange_n ||
              Op == AtomicExpr::AO__atomic_compare_exchange_n;
   bool IsAddSub = false;
 
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     Form = Init;
     break;
 
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
     Form = Load;
     break;
@@ -2805,13 +2819,16 @@
     break;
 
   case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n:
     Form = Copy;
     break;
 
   case AtomicExpr::AO__c11_atomic_fetch_add:
   case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
   case AtomicExpr::AO__atomic_fetch_add:
   case AtomicExpr::AO__atomic_fetch_sub:
   case AtomicExpr::AO__atomic_add_fetch:
@@ -2821,6 +2838,9 @@
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
   case AtomicExpr::AO__atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_xor:
@@ -2833,6 +2853,7 @@
     break;
 
   case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_exchange_n:
     Form = Xchg;
     break;
@@ -2843,6 +2864,8 @@
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
     Form = C11CmpXchg;
     break;
 
@@ -2852,17 +2875,20 @@
     break;
   }
 
+  unsigned AdjustedNumArgs = NumArgs[Form];
+  if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init)
+    ++AdjustedNumArgs;
   // Check we have the right number of arguments.
-  if (TheCall->getNumArgs() < NumArgs[Form]) {
+  if (TheCall->getNumArgs() < AdjustedNumArgs) {
     Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
-      << 0 << NumArgs[Form] << TheCall->getNumArgs()
-      << TheCall->getCallee()->getSourceRange();
+        << 0 << AdjustedNumArgs << TheCall->getNumArgs()
+        << TheCall->getCallee()->getSourceRange();
     return ExprError();
-  } else if (TheCall->getNumArgs() > NumArgs[Form]) {
-    Diag(TheCall->getArg(NumArgs[Form])->getLocStart(),
+  } else if (TheCall->getNumArgs() > AdjustedNumArgs) {
+    Diag(TheCall->getArg(AdjustedNumArgs)->getLocStart(),
          diag::err_typecheck_call_too_many_args)
-      << 0 << NumArgs[Form] << TheCall->getNumArgs()
-      << TheCall->getCallee()->getSourceRange();
+        << 0 << AdjustedNumArgs << TheCall->getNumArgs()
+        << TheCall->getCallee()->getSourceRange();
     return ExprError();
   }
 
@@ -2974,7 +3000,7 @@
   // The first argument --- the pointer --- has a fixed type; we
   // deduce the types of the rest of the arguments accordingly.  Walk
   // the remaining arguments, converting them to the deduced value type.
-  for (unsigned i = 1; i != NumArgs[Form]; ++i) {
+  for (unsigned i = 1; i != TheCall->getNumArgs(); ++i) {
     QualType Ty;
     if (i < NumVals[Form] + 1) {
       switch (i) {
@@ -3029,6 +3055,15 @@
     TheCall->setArg(i, Arg.get());
   }
 
+  Expr *Scope;
+  if (IsOpenCL) {
+    Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
+  } else {
+    Scope = IntegerLiteral::Create(
+        Context, llvm::APInt(Context.getTypeSize(Context.IntTy), (uint64_t)1),
+        Context.IntTy, SourceLocation());
+  }
+
   // Permute the arguments into a 'consistent' order.
   SmallVector<Expr*, 5> SubExprs;
   SubExprs.push_back(Ptr);
@@ -3039,28 +3074,33 @@
     break;
   case Load:
     SubExprs.push_back(TheCall->getArg(1)); // Order
+    SubExprs.push_back(Scope);              // Scope
     break;
   case LoadCopy:
   case Copy:
   case Arithmetic:
   case Xchg:
     SubExprs.push_back(TheCall->getArg(2)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     break;
   case GNUXchg:
     // Note, AtomicExpr::getVal2() has a special case for this atomic.
     SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case C11CmpXchg:
     SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(4)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case GNUCmpXchg:
     SubExprs.push_back(TheCall->getArg(4)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(5)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
@@ -3080,12 +3120,17 @@
   AtomicExpr *AE = new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(),
                                             SubExprs, ResultType, Op,
                                             TheCall->getRParenLoc());
-  
+
   if ((Op == AtomicExpr::AO__c11_atomic_load ||
-       (Op == AtomicExpr::AO__c11_atomic_store)) &&
+       Op == AtomicExpr::AO__c11_atomic_store ||
+       Op == AtomicExpr::AO__opencl_atomic_load ||
+       Op == AtomicExpr::AO__opencl_atomic_store) &&
       Context.AtomicUsesUnsupportedLibcall(AE))
-    Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib) <<
-    ((Op == AtomicExpr::AO__c11_atomic_load) ? 0 : 1);
+    Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib)
+        << ((Op == AtomicExpr::AO__c11_atomic_load ||
+             Op == AtomicExpr::AO__opencl_atomic_load)
+                ? 0
+                : 1);
 
   return AE;
 }
Index: lib/Headers/opencl-c.h
===================================================================
--- lib/Headers/opencl-c.h
+++ lib/Headers/opencl-c.h
@@ -13950,13 +13950,12 @@
 #define ATOMIC_FLAG_INIT 0
 
 // enum values aligned with what clang uses in EmitAtomicExpr()
-typedef enum memory_order
-{
-  memory_order_relaxed,
-  memory_order_acquire,
-  memory_order_release,
-  memory_order_acq_rel,
-  memory_order_seq_cst
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
 } memory_order;
 
 // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1971,7 +1971,12 @@
   }
 
   /// Emit a cast to void* in the appropriate address space.
-  llvm::Value *EmitCastToVoidPtr(llvm::Value *value);
+  /// If \p CastToGenericAddrSpace is true, cast to void* in generic address
+  /// space, which corresponds to LangAS::opencl_generic for OpenCL 2.0 and
+  /// LangAS::Default for other languages. Otherwise keep the pointer in its
+  /// own address space.
+  llvm::Value *EmitCastToVoidPtr(llvm::Value *value,
+                                 bool CastToGenericAddrSpace = false);
 
   /// EvaluateExprAsBool - Perform the usual unary conversions on the specified
   /// expression and compare the result against zero, returning an Int1Ty value.
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -46,16 +46,23 @@
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
 
-llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
-  unsigned addressSpace =
-    cast<llvm::PointerType>(value->getType())->getAddressSpace();
+llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value,
+                                                bool CastToGenericAddrSpace) {
+  unsigned addressSpace;
+  if (CastToGenericAddrSpace) {
+    unsigned AS = getLangOpts().OpenCLVersion >= 200 ? LangAS::opencl_generic
+                                                     : LangAS::Default;
+    addressSpace = getContext().getTargetAddressSpace(AS);
+  } else {
+    addressSpace = cast<llvm::PointerType>(value->getType())->getAddressSpace();
+  }
 
   llvm::PointerType *destType = Int8PtrTy;
   if (addressSpace)
     destType = llvm::Type::getInt8PtrTy(getLLVMContext(), addressSpace);
 
   if (value->getType() == destType) return value;
-  return Builder.CreateBitCast(value, destType);
+  return Builder.CreatePointerCast(value, destType);
 }
 
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3885,7 +3885,8 @@
         // can happen due to trivial type mismatches.
         if (FirstIRArg < IRFuncTy->getNumParams() &&
             V->getType() != IRFuncTy->getParamType(FirstIRArg))
-          V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
+          V = Builder.CreateBitOrPointerCast(
+              V, IRFuncTy->getParamType(FirstIRArg));
 
         IRCallArgs[FirstIRArg] = V;
         break;
Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -355,17 +355,17 @@
 }
 
 static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
-                              Address Dest, Address Ptr,
-                              Address Val1, Address Val2,
-                              uint64_t Size,
+                              Address Dest, Address Ptr, Address Val1,
+                              Address Val2, uint64_t Size,
                               llvm::AtomicOrdering SuccessOrder,
-                              llvm::AtomicOrdering FailureOrder) {
+                              llvm::AtomicOrdering FailureOrder,
+                              llvm::SynchronizationScope Scope) {
   // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
   llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
   llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
 
   llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
-      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder);
+      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder, Scope);
   Pair->setVolatile(E->isVolatile());
   Pair->setWeak(IsWeak);
 
@@ -402,12 +402,10 @@
 /// Given an ordering required on success, emit all possible cmpxchg
 /// instructions to cope with the provided (but possibly only dynamically known)
 /// FailureOrder.
-static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
-                                        bool IsWeak, Address Dest, Address Ptr,
-                                        Address Val1, Address Val2,
-                                        llvm::Value *FailureOrderVal,
-                                        uint64_t Size,
-                                        llvm::AtomicOrdering SuccessOrder) {
+static void emitAtomicCmpXchgFailureSet(
+    CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, Address Dest, Address Ptr,
+    Address Val1, Address Val2, llvm::Value *FailureOrderVal, uint64_t Size,
+    llvm::AtomicOrdering SuccessOrder, llvm::SynchronizationScope Scope) {
   llvm::AtomicOrdering FailureOrder;
   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
     auto FOS = FO->getSExtValue();
@@ -435,7 +433,7 @@
           llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
     }
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
-                      FailureOrder);
+                      FailureOrder, Scope);
     return;
   }
 
@@ -459,14 +457,14 @@
   // doesn't matter unless someone is crazy enough to use something that
   // doesn't fold to a constant for the ordering.
   CGF.Builder.SetInsertPoint(MonotonicBB);
-  emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic);
+  emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+                    llvm::AtomicOrdering::Monotonic, Scope);
   CGF.Builder.CreateBr(ContBB);
 
   if (AcquireBB) {
     CGF.Builder.SetInsertPoint(AcquireBB);
-    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+                      llvm::AtomicOrdering::Acquire, Scope);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
@@ -476,7 +474,7 @@
   if (SeqCstBB) {
     CGF.Builder.SetInsertPoint(SeqCstBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
-                      llvm::AtomicOrdering::SequentiallyConsistent);
+                      llvm::AtomicOrdering::SequentiallyConsistent, Scope);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
                 SeqCstBB);
@@ -488,27 +486,31 @@
 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
                          Address Ptr, Address Val1, Address Val2,
                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
-                         uint64_t Size, llvm::AtomicOrdering Order) {
+                         uint64_t Size, llvm::AtomicOrdering Order,
+                         llvm::SynchronizationScope Scope) {
   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
   llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     llvm_unreachable("Already handled!");
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
     emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Order);
+                                FailureOrder, Size, Order, Scope);
     return;
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
     emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Order);
+                                FailureOrder, Size, Order, Scope);
     return;
   case AtomicExpr::AO__atomic_compare_exchange:
   case AtomicExpr::AO__atomic_compare_exchange_n: {
     if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
       emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
-                                  Val1, Val2, FailureOrder, Size, Order);
+                                  Val1, Val2, FailureOrder, Size, Order, Scope);
     } else {
       // Create all the relevant BB's
       llvm::BasicBlock *StrongBB =
@@ -522,39 +524,42 @@
 
       CGF.Builder.SetInsertPoint(StrongBB);
       emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Order);
+                                  FailureOrder, Size, Order, Scope);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(WeakBB);
       emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Order);
+                                  FailureOrder, Size, Order, Scope);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(ContBB);
     }
     return;
   }
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load: {
     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
-    Load->setAtomic(Order);
+    Load->setAtomic(Order, Scope);
     Load->setVolatile(E->isVolatile());
     CGF.Builder.CreateStore(Load, Dest);
     return;
   }
 
   case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n: {
     llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
-    Store->setAtomic(Order);
+    Store->setAtomic(Order, Scope);
     Store->setVolatile(E->isVolatile());
     return;
   }
 
   case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_exchange_n:
   case AtomicExpr::AO__atomic_exchange:
     Op = llvm::AtomicRMWInst::Xchg;
@@ -564,38 +569,43 @@
     PostOp = llvm::Instruction::Add;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
   case AtomicExpr::AO__atomic_fetch_add:
     Op = llvm::AtomicRMWInst::Add;
     break;
 
   case AtomicExpr::AO__atomic_sub_fetch:
     PostOp = llvm::Instruction::Sub;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
   case AtomicExpr::AO__atomic_fetch_sub:
     Op = llvm::AtomicRMWInst::Sub;
     break;
 
   case AtomicExpr::AO__atomic_and_fetch:
     PostOp = llvm::Instruction::And;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_and:
     Op = llvm::AtomicRMWInst::And;
     break;
 
   case AtomicExpr::AO__atomic_or_fetch:
     PostOp = llvm::Instruction::Or;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_or:
     Op = llvm::AtomicRMWInst::Or;
     break;
 
   case AtomicExpr::AO__atomic_xor_fetch:
     PostOp = llvm::Instruction::Xor;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
   case AtomicExpr::AO__atomic_fetch_xor:
     Op = llvm::AtomicRMWInst::Xor;
     break;
@@ -610,7 +620,7 @@
 
   llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
   llvm::AtomicRMWInst *RMWI =
-      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order);
+      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
   RMWI->setVolatile(E->isVolatile());
 
   // For __atomic_*_fetch operations, perform the operation again to
@@ -659,6 +669,7 @@
 }
 
 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
+  bool IsOpenCL = E->isOpenCL();
   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
@@ -677,19 +688,23 @@
   Address Dest = Address::invalid();
   Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
 
-  if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
+  if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
+      E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
     LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
     EmitAtomicInit(E->getVal1(), lvalue);
     return RValue::get(nullptr);
   }
 
   llvm::Value *Order = EmitScalarExpr(E->getOrder());
+  llvm::Value *Scope = EmitScalarExpr(E->getScope());
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     llvm_unreachable("Already handled above with EmitAtomicInit!");
 
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
     break;
 
@@ -708,20 +723,24 @@
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
   case AtomicExpr::AO__atomic_compare_exchange_n:
   case AtomicExpr::AO__atomic_compare_exchange:
     Val1 = EmitPointerWithAlignment(E->getVal1());
     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
       Val2 = EmitPointerWithAlignment(E->getVal2());
     else
       Val2 = EmitValToTemp(*this, E->getVal2());
     OrderFail = EmitScalarExpr(E->getOrderFail());
-    if (E->getNumSubExprs() == 6)
+    if (E->getNumSubExprs() == 7)
       IsWeak = EmitScalarExpr(E->getWeak());
     break;
 
   case AtomicExpr::AO__c11_atomic_fetch_add:
   case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
     if (MemTy->isPointerType()) {
       // For pointer arithmetic, we're required to do a bit of math:
       // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
@@ -744,11 +763,16 @@
   case AtomicExpr::AO__atomic_sub_fetch:
   case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_store_n:
   case AtomicExpr::AO__atomic_exchange_n:
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
   case AtomicExpr::AO__atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_xor:
@@ -784,18 +808,24 @@
     bool UseOptimizedLibcall = false;
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
+    case AtomicExpr::AO__opencl_atomic_init:
       llvm_unreachable("Already handled above with EmitAtomicInit!");
 
     case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__opencl_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
     case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__opencl_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
     case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__opencl_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_nand:
     case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__opencl_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
     case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__opencl_atomic_fetch_xor:
     case AtomicExpr::AO__atomic_fetch_xor:
     case AtomicExpr::AO__atomic_add_fetch:
     case AtomicExpr::AO__atomic_and_fetch:
@@ -812,6 +842,11 @@
     case AtomicExpr::AO__c11_atomic_exchange:
     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__opencl_atomic_load:
+    case AtomicExpr::AO__opencl_atomic_store:
+    case AtomicExpr::AO__opencl_atomic_exchange:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
     case AtomicExpr::AO__atomic_load_n:
     case AtomicExpr::AO__atomic_load:
     case AtomicExpr::AO__atomic_store_n:
@@ -833,8 +868,12 @@
                getContext().getSizeType());
     }
     // Atomic address is the first or second parameter
-    Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())),
-             getContext().VoidPtrTy);
+    // The OpenCL atomic library functions only accept pointer arguments to
+    // generic address space.
+    Args.add(
+        RValue::get(EmitCastToVoidPtr(Ptr.getPointer(),
+                                      /*CastToGenericAddrSpace*/ IsOpenCL)),
+        getContext().VoidPtrTy);
 
     std::string LibCallName;
     QualType LoweredMemTy =
@@ -844,6 +883,7 @@
     llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
+    case AtomicExpr::AO__opencl_atomic_init:
       llvm_unreachable("Already handled!");
 
     // There is only one libcall for compare an exchange, because there is no
@@ -855,13 +895,17 @@
     //                                  int success, int failure)
     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
     case AtomicExpr::AO__atomic_compare_exchange:
     case AtomicExpr::AO__atomic_compare_exchange_n:
       LibCallName = "__atomic_compare_exchange";
       RetTy = getContext().BoolTy;
       HaveRetTy = true;
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())),
-               getContext().VoidPtrTy);
+      Args.add(
+          RValue::get(EmitCastToVoidPtr(Val1.getPointer(),
+                                        /*CastToGenericAddrSpace*/ IsOpenCL)),
+          getContext().VoidPtrTy);
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
                         MemTy, E->getExprLoc(), sizeChars);
       Args.add(RValue::get(Order), getContext().IntTy);
@@ -871,6 +915,7 @@
     //                        int order)
     // T __atomic_exchange_N(T *mem, T val, int order)
     case AtomicExpr::AO__c11_atomic_exchange:
+    case AtomicExpr::AO__opencl_atomic_exchange:
     case AtomicExpr::AO__atomic_exchange_n:
     case AtomicExpr::AO__atomic_exchange:
       LibCallName = "__atomic_exchange";
@@ -880,6 +925,7 @@
     // void __atomic_store(size_t size, void *mem, void *val, int order)
     // void __atomic_store_N(T *mem, T val, int order)
     case AtomicExpr::AO__c11_atomic_store:
+    case AtomicExpr::AO__opencl_atomic_store:
     case AtomicExpr::AO__atomic_store:
     case AtomicExpr::AO__atomic_store_n:
       LibCallName = "__atomic_store";
@@ -891,6 +937,7 @@
     // void __atomic_load(size_t size, void *mem, void *return, int order)
     // T __atomic_load_N(T *mem, int order)
     case AtomicExpr::AO__c11_atomic_load:
+    case AtomicExpr::AO__opencl_atomic_load:
     case AtomicExpr::AO__atomic_load:
     case AtomicExpr::AO__atomic_load_n:
       LibCallName = "__atomic_load";
@@ -901,6 +948,7 @@
       PostOp = llvm::Instruction::Add;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__opencl_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
       LibCallName = "__atomic_fetch_add";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -912,6 +960,7 @@
       PostOp = llvm::Instruction::And;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__opencl_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
       LibCallName = "__atomic_fetch_and";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -923,6 +972,7 @@
       PostOp = llvm::Instruction::Or;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__opencl_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
       LibCallName = "__atomic_fetch_or";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -934,6 +984,7 @@
       PostOp = llvm::Instruction::Sub;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__opencl_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
       LibCallName = "__atomic_fetch_sub";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -945,6 +996,7 @@
       PostOp = llvm::Instruction::Xor;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__opencl_atomic_fetch_xor:
     case AtomicExpr::AO__atomic_fetch_xor:
       LibCallName = "__atomic_fetch_xor";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -962,6 +1014,10 @@
       break;
     }
 
+    if (IsOpenCL) {
+      LibCallName =
+          std::string("__opencl") + StringRef(LibCallName).drop_front(1).str();
+    }
     // Optimized functions have the size in their name.
     if (UseOptimizedLibcall)
       LibCallName += "_" + llvm::utostr(Size);
@@ -982,6 +1038,8 @@
     // order is always the last parameter
     Args.add(RValue::get(Order),
              getContext().IntTy);
+    if (IsOpenCL)
+      Args.add(RValue::get(Scope), getContext().IntTy);
 
     // PostOp is only needed for the atomic_*_fetch operations, and
     // thus is only needed for and implemented in the
@@ -1018,44 +1076,51 @@
   }
 
   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
+                 E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
                  E->getOp() == AtomicExpr::AO__atomic_store ||
                  E->getOp() == AtomicExpr::AO__atomic_store_n;
   bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
+                E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
+  assert(isa<llvm::ConstantInt>(Scope) &&
+         "Non-constant synchronization scope not supported");
+  auto sco = (llvm::SynchronizationScope)(
+      cast<llvm::ConstantInt>(Scope)->getZExtValue());
+
   if (isa<llvm::ConstantInt>(Order)) {
     auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     // We should not ever get to a case where the ordering isn't a valid C ABI
     // value, but it's hard to enforce that in general.
     if (llvm::isValidAtomicOrderingCABI(ord))
       switch ((llvm::AtomicOrderingCABI)ord) {
       case llvm::AtomicOrderingCABI::relaxed:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Monotonic);
+                     llvm::AtomicOrdering::Monotonic, sco);
         break;
       case llvm::AtomicOrderingCABI::consume:
       case llvm::AtomicOrderingCABI::acquire:
         if (IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Acquire);
+                     llvm::AtomicOrdering::Acquire, sco);
         break;
       case llvm::AtomicOrderingCABI::release:
         if (IsLoad)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Release);
+                     llvm::AtomicOrdering::Release, sco);
         break;
       case llvm::AtomicOrderingCABI::acq_rel:
         if (IsLoad || IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::AcquireRelease);
+                     llvm::AtomicOrdering::AcquireRelease, sco);
         break;
       case llvm::AtomicOrderingCABI::seq_cst:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::SequentiallyConsistent);
+                     llvm::AtomicOrdering::SequentiallyConsistent, sco);
         break;
       }
     if (RValTy->isVoidType())
@@ -1091,38 +1156,38 @@
 
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::AtomicOrdering::Monotonic);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+               llvm::AtomicOrdering::Monotonic, sco);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::Acquire);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                 llvm::AtomicOrdering::Acquire, sco);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
                 AcquireBB);
   }
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::Release);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                 llvm::AtomicOrdering::Release, sco);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                 ReleaseBB);
   }
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::AcquireRelease);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                 llvm::AtomicOrdering::AcquireRelease, sco);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::AtomicOrdering::SequentiallyConsistent);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+               llvm::AtomicOrdering::SequentiallyConsistent, sco);
   Builder.CreateBr(ContBB);
   SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
               SeqCstBB);
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2170,6 +2170,7 @@
     assert(DataLayout->getAllocaAddrSpace() == AS.Private);
 
     UseAddrSpaceMapMangling = true;
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   }
 
   void adjust(LangOptions &Opts) override {
Index: lib/AST/StmtPrinter.cpp
===================================================================
--- lib/AST/StmtPrinter.cpp
+++ lib/AST/StmtPrinter.cpp
@@ -1846,7 +1846,8 @@
   // AtomicExpr stores its subexpressions in a permuted order.
   PrintExpr(Node->getPtr());
   if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
-      Node->getOp() != AtomicExpr::AO__atomic_load_n) {
+      Node->getOp() != AtomicExpr::AO__atomic_load_n &&
+      Node->getOp() != AtomicExpr::AO__opencl_atomic_load) {
     OS << ", ";
     PrintExpr(Node->getVal1());
   }
@@ -1860,7 +1861,8 @@
     OS << ", ";
     PrintExpr(Node->getWeak());
   }
-  if (Node->getOp() != AtomicExpr::AO__c11_atomic_init) {
+  if (Node->getOp() != AtomicExpr::AO__c11_atomic_init &&
+      Node->getOp() != AtomicExpr::AO__opencl_atomic_init) {
     OS << ", ";
     PrintExpr(Node->getOrder());
   }
Index: lib/AST/Expr.cpp
===================================================================
--- lib/AST/Expr.cpp
+++ lib/AST/Expr.cpp
@@ -3929,12 +3929,17 @@
 unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   switch (Op) {
   case AO__c11_atomic_init:
+  case AO__opencl_atomic_init:
+    return 2;
   case AO__c11_atomic_load:
+  case AO__opencl_atomic_load:
   case AO__atomic_load_n:
-    return 2;
+    return 3;
 
   case AO__c11_atomic_store:
   case AO__c11_atomic_exchange:
+  case AO__opencl_atomic_store:
+  case AO__opencl_atomic_exchange:
   case AO__atomic_load:
   case AO__atomic_store:
   case AO__atomic_store_n:
@@ -3944,6 +3949,11 @@
   case AO__c11_atomic_fetch_and:
   case AO__c11_atomic_fetch_or:
   case AO__c11_atomic_fetch_xor:
+  case AO__opencl_atomic_fetch_add:
+  case AO__opencl_atomic_fetch_sub:
+  case AO__opencl_atomic_fetch_and:
+  case AO__opencl_atomic_fetch_or:
+  case AO__opencl_atomic_fetch_xor:
   case AO__atomic_fetch_add:
   case AO__atomic_fetch_sub:
   case AO__atomic_fetch_and:
@@ -3956,18 +3966,20 @@
   case AO__atomic_or_fetch:
   case AO__atomic_xor_fetch:
   case AO__atomic_nand_fetch:
-    return 3;
+    return 4;
 
   case AO__atomic_exchange:
-    return 4;
+    return 5;
 
   case AO__c11_atomic_compare_exchange_strong:
   case AO__c11_atomic_compare_exchange_weak:
-    return 5;
+  case AO__opencl_atomic_compare_exchange_strong:
+  case AO__opencl_atomic_compare_exchange_weak:
+    return 6;
 
   case AO__atomic_compare_exchange:
   case AO__atomic_compare_exchange_n:
-    return 6;
+    return 7;
   }
   llvm_unreachable("unknown atomic op");
 }
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -1182,7 +1182,14 @@
   ObjCSuperType = QualType();
 
   // void * type
-  VoidPtrTy = getPointerType(VoidTy);
+  if (LangOpts.OpenCLVersion >= 200) {
+    auto Q = VoidTy.getQualifiers();
+    Q.setAddressSpace(LangAS::opencl_generic);
+    VoidPtrTy = getPointerType(
+        getCanonicalType(getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
+  } else {
+    VoidPtrTy = getPointerType(VoidTy);
+  }
 
   // nullptr type (C++0x 2.14.7)
   InitBuiltinType(NullPtrTy,           BuiltinType::NullPtr);
Index: include/clang/Basic/Builtins.def
===================================================================
--- include/clang/Basic/Builtins.def
+++ include/clang/Basic/Builtins.def
@@ -699,6 +699,19 @@
 BUILTIN(__atomic_always_lock_free, "izvCD*", "n")
 BUILTIN(__atomic_is_lock_free, "izvCD*", "n")
 
+// OpenCL 2.0 atomic builtins.
+ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_exchange, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_strong, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_weak, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_add, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_sub, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_and, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_or, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_xor, "v.", "t")
+
 #undef ATOMIC_BUILTIN
 
 // Non-overloaded atomic builtins.
Index: include/clang/AST/Expr.h
===================================================================
--- include/clang/AST/Expr.h
+++ include/clang/AST/Expr.h
@@ -5061,9 +5061,11 @@
 
 /// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*,
 /// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the
-/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>.
-/// All of these instructions take one primary pointer and at least one memory
-/// order.
+/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>,
+/// and corresponding __opencl_atomic_* for OpenCL 2.0.
+/// All of these instructions take one primary pointer, at least one memory
+/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr
+/// always take the default synchronization scope.
 class AtomicExpr : public Expr {
 public:
   enum AtomicOp {
@@ -5075,7 +5077,7 @@
   };
 
 private:
-  enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
+  enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
   Stmt* SubExprs[END_EXPR];
   unsigned NumSubExprs;
   SourceLocation BuiltinLoc, RParenLoc;
@@ -5100,8 +5102,9 @@
   Expr *getOrder() const {
     return cast<Expr>(SubExprs[ORDER]);
   }
+  Expr *getScope() const { return cast<Expr>(SubExprs[SCOPE]); }
   Expr *getVal1() const {
-    if (Op == AO__c11_atomic_init)
+    if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init)
       return cast<Expr>(SubExprs[ORDER]);
     assert(NumSubExprs > VAL1);
     return cast<Expr>(SubExprs[VAL1]);
@@ -5136,10 +5139,17 @@
   bool isCmpXChg() const {
     return getOp() == AO__c11_atomic_compare_exchange_strong ||
            getOp() == AO__c11_atomic_compare_exchange_weak ||
+           getOp() == AO__opencl_atomic_compare_exchange_strong ||
+           getOp() == AO__opencl_atomic_compare_exchange_weak ||
            getOp() == AO__atomic_compare_exchange ||
            getOp() == AO__atomic_compare_exchange_n;
   }
 
+  bool isOpenCL() const {
+    return getOp() >= AO__opencl_atomic_init &&
+           getOp() <= AO__opencl_atomic_fetch_xor;
+  }
+
   SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
   SourceLocation getRParenLoc() const { return RParenLoc; }
 
Index: docs/LanguageExtensions.rst
===================================================================
--- docs/LanguageExtensions.rst
+++ docs/LanguageExtensions.rst
@@ -1850,7 +1850,9 @@
 ``memory_order`` enumeration.
 
 (Note that Clang additionally provides GCC-compatible ``__atomic_*``
-builtins)
+builtins and OpenCL 2.0 ``__opencl_atomic_*`` builtins. The OpenCL 2.0
+atomic builtins are in ``explicit`` form of the corresponding OpenCL 2.0
+builtin function, and are named with a ``__opencl__`` prefix.)
 
 Low-level ARM exclusive memory builtins
 ---------------------------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to