[Beignet] anyone with a Comet Lake to test this beignet patch on?

2020-11-24 Thread Rebecca N. Palmer
Ridley Combs suggests that adding the PCI IDs would be enough to support 
Comet Lake:


https://github.com/intel/beignet/pull/20/files
https://bugs.launchpad.net/ubuntu/+source/beignet/+bug/1905340

but neither they nor I have the hardware to test this.  Does anyone here?
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Attempt at LLVM 10 support (currently broken)

2020-01-18 Thread Rebecca N. Palmer
This gets it to build, but *crashes when run* with
builtin_acos_float()clang (LLVM option parsing): for the --pgo-warn-misexpect 
option: may only occur zero or one times!

This vaguely reminds me of multiple-LLVMs bugs like
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=768185 ,
but Debian beignet already links LLVM/clang statically
which has previously avoided those.

-

Remove -std=c++0x, as LLVM 10 requires at least c++14 (the default)
Note that this triggers a gcc bug, so build with clang
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93299

BasicBlockPass no longer exists; as they suggest, replace it with
FunctionPass with a loop over BasicBlocks
https://github.com/llvm/llvm-project/commit/9f0ff0b2634bab6a5be8dace005c9eb24d386dd1#diff-bddbe5e4c647cb67298584000b67dea1
Return true from IntrinsicLoweringPass as it can modify its input
(possibly a bug before?)

setAlignment now takes a MaybeAlign not a uint

Don't call initializeDominatorTreeWrapperPassPass and
initializeLoopInfoWrapperPassPass, as they no longer exist

Add explicit template initialization to avoid an undefined symbol

###does not help###
Pass clang libs as a single string to prevent them being converted
to -Wl,-Bstatic -lclang... -Wl,-Bdynamic, as that causes a
multiply defined options crash when run
(multiple dynamic LLVMs??)


--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -120,11 +120,14 @@ macro(add_one_lib name)
 endif (LLVM_SYSTEM_LIBS_ORIG)
 endif (LLVM_VERSION_NODOT VERSION_GREATER 34)
 
+#something harmless because whitespace at start is an error
+set(CLANG_LIBRARIES "-ldl")
 macro(add_one_lib name)
   FIND_LIBRARY(CLANG_LIB
 NAMES ${name}
 PATHS ${LLVM_LIBRARY_DIR} NO_DEFAULT_PATH)
-  set(CLANG_LIBRARIES ${CLANG_LIBRARIES} ${CLANG_LIB})
+  set(CLANG_LIBRARIES "${CLANG_LIBRARIES} ${CLANG_LIB}")
+  message(STATUS "clanglibs name ${name} this ${CLANG_LIB} all 
${CLANG_LIBRARIES} ")
unset(CLANG_LIB CACHE)
 endmacro()
 
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,7 +78,7 @@ elseif (COMPILER STREQUAL "CLANG")
 elseif (COMPILER STREQUAL "ICC")
   set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  -wd2928 -Wall -fPIC 
-fstrict-aliasing -fp-model fast -msse4.1 -Wl,-E")
 endif ()
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -std=c++0x 
-Wno-invalid-offsetof")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} 
-Wno-invalid-offsetof")
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}")
 set (CMAKE_CXX_FLAGS_DEBUG  "-O0 -g -DGBE_DEBUG=1")
 set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -26,8 +26,8 @@ if (NOT NOT_BUILD_STAND_ALONE_UTEST)
   # Threads
   Find_Package(Threads)
 
-  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -std=c++0x 
-Wno-invalid-offsetof")
-  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}")
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} 
-Wno-invalid-offsetof -ffloat-store -fno-strict-aliasing")
+  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS} -ffloat-store 
-fno-strict-aliasing") #compiler_{degrees,radians,function_argument2} use 
equality comparison of floats, compiler_long_bitcast uses aliasing
   set (CMAKE_CXX_FLAGS_DEBUG  "-O0 -g -DGBE_DEBUG=1")
   set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
   set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
--- beignet-1.3.2.orig/backend/src/backend/program.cpp
+++ beignet-1.3.2/backend/src/backend/program.cpp
@@ -695,8 +695,12 @@ namespace gbe {
 );
 
 clang::CompilerInvocation::CreateFromArgs(*CI,
+#if LLVM_VERSION_MAJOR < 10
   [0],
   [0] + args.size(),
+#else
+  clang::ArrayRef(args),
+#endif
   Diags);
 // Create the compiler instance
 clang::CompilerInstance Clang;
@@ -1248,8 +1252,12 @@ EXTEND_QUOTE:
   // Create the compiler invocation
   std::unique_ptr CI(new 
clang::CompilerInvocation);
   return clang::CompilerInvocation::CreateFromArgs(*CI,
+#if LLVM_VERSION_MAJOR < 10
[0],
[0] + args.size(),
+#else
+   clang::ArrayRef(args),
+#endif
Diags);
 }
 #endif
--- beignet-1.3.2.orig/backend/src/llvm/llvm_gen_backend.hpp
+++ beignet-1.3.2/backend/src/llvm/llvm_gen_backend.hpp
@@ -130,10 +130,10 @@ namespace gbe
   llvm::FunctionPass *createGenPass(ir::Unit );
 
   /*! Remove the GEP instructions */
-  llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit );
+  llvm::FunctionPass *createRemoveGEPPass(const ir::Unit );
 
   /*! Merge load/store if possible */
-  llvm::BasicBlockPass 

Re: [Beignet] beignet and LLVM 8+

2020-01-13 Thread Rebecca N. Palmer

Benson Muite wrote:
There are a number of people with older hardware. Is there enough of a 
community to sustain this?


The Debian package has patches from 3 contributors since upstream 
development stopped, but most of the commits are mine.  This has been 
enough to get us from LLVM 5 then to 9 now, but might not be if future 
LLVM changes are harder to adapt to, and I don't promise to keep trying.


Fedora's beignet package has been removed ( 
https://src.fedoraproject.org/rpms/beignet ), and FreeBSD's is marked as 
deprecated ( https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=233652 ).


It is likely that Neo will be fixed, it is 
already making its way into Fedora repositories.


The Neo bug I refer to is in the Debian packaging, so presumably limited 
to the Debian/Ubuntu family.


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 3/3] Fix rotate and subgroup crashes with LLVM 8+

2020-01-12 Thread Rebecca N. Palmer
Block rotate to fshl optimization, as we don't implement fshl.
Set reg for physical registers to avoid out-of-range index crash.

Signed-off-by: Rebecca N. Palmer 
---
(where patches 1 and 2 are the LLVM 8/9 ones from
https://svnweb.freebsd.org/ports/head/lang/beignet/files/ ;
see also https://salsa.debian.org/opencl-team/beignet )

The tests succeed with this - I'm still not sure if I like it
(further testing welcome), but given that upstream is abandoned,
it's probably all we're going to get.

For rotate, I tried to add fshl/fshr to llvm_gen_backend.cpp (mapped
to rsl/rsr, though I never got far enough to find out whether
that's actually correct), but there are enough other places that
don't handle rsl/rsr that the obvious ways didn't work, so I decided
to block the optimization instead.

For subgroup, testing with LLVM 7 found that the underlying
"physical register passed to a place it maybe shouldn't be" issue
is not a new problem, so I kept the previously posted fix for the
crash and didn't investigate further.

--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
@@ -216,13 +216,14 @@ OVERLOADABLE ulong mad_sat(ulong a, ulon
   return __gen_ocl_mad_sat(a, b, c);
 }
 
-OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> 
(8 - y)); }
+// the 'volatile' is to make the LLVM optimizer leave these alone, as it would 
convert them to intrinsics (fshl/fshr) that we don't implement
+OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { volatile uchar z; z = (x 
<< y); return z | (x >> (8 - y)); }
 OVERLOADABLE char __rotate_left(char x, char y) { return 
__rotate_left((uchar)x, (uchar)y); }
-OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x 
>> (16 - y)); }
+OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { volatile ushort z; z = 
(x << y); return z | (x >> (16 - y)); }
 OVERLOADABLE short __rotate_left(short x, short y) { return 
__rotate_left((ushort)x, (ushort)y); }
-OVERLOADABLE uint __rotate_left(uint x, uint y) { return (x << y) | (x >> (32 
- y)); }
+OVERLOADABLE uint __rotate_left(uint x, uint y) { volatile uint z; z = (x << 
y); return z | (x >> (32 - y)); }
 OVERLOADABLE int __rotate_left(int x, int y) { return __rotate_left((uint)x, 
(uint)y); }
-OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { return (x << y) | (x >> 
(64 - y)); }
+OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { volatile ulong z; z = (x 
<< y); return z | (x >> (64 - y)); }
 OVERLOADABLE long __rotate_left(long x, long y) { return 
__rotate_left((ulong)x, (ulong)y); }
 #define DEF(type, m) OVERLOADABLE type rotate(type x, type y) { return 
__rotate_left(x, (type)(y & m)); }
 DEF(char, 7)
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -225,6 +225,7 @@ namespace gbe
uint32_t width,
uint32_t hstride)
 {
+  this->value.reg = 0;//avoid subgroup crash
   this->type = type;
   this->file = file;
   this->nr = nr;

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] beignet and LLVM 8+

2020-01-11 Thread Rebecca N. Palmer

Progress so far:


compiler_rotate()ASSERTION FAILED: Unsupported intrinsics


The intrinsic in question is an fshl (funnel shift left).  I suspect 
this issue appeared because LLVM started optimizing rotates to this 
intrinsic:


https://github.com/llvm/llvm-project/commit/654e6aabb9f25d0d0fbad194ae6e26dd96c9e9db
https://github.com/llvm/llvm-project/commit/d023dd60e944886a9d5a0b1dbf46f67d43293af8

They say this shouldn't break targets that don't have a rotate 
instruction, but we may be doing weird enough things that this doesn't 
apply to us.



compiler_subgroup_buffer_block_write_ui1()ASSERTION FAILED: index <
this->size()


GenRegAllocator::Opaque::allocate 
(backend/src/backend/gen_reg_allocation.cpp:1268) uses reg = 
insn.src(srcID).reg() as an index, and this assert is that it is out of 
range.


As insn.src(srcID).physical=1 on the failing GenRegister object, it was 
probably created with the physical registers form of the constructor 
(backend/src/backend/gen_register.hpp:219), which leaves reg unset.


These tests start passing if I set it to 0, but I suspect this may not 
be a proper solution:


--- beignet-1.3.2.orig/backend/src/backend/gen_register.hpp
+++ beignet-1.3.2/backend/src/backend/gen_register.hpp
@@ -225,6 +225,7 @@ namespace gbe
uint32_t width,
uint32_t hstride)
 {
+this->value.reg = 0;//fix subgroup crash??
   this->type = type;
   this->file = file;
   this->nr = nr;

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] beignet and LLVM 8+

2020-01-11 Thread Rebecca N. Palmer
Debian's beignet-opencl-icd package currently uses LLVM 7, and fails to 
build with anything newer.  This will no longer be an option when LLVM 7 
is removed from Debian ( https://bugs.debian.org/947438 ).


Patches exist that allow beignet to build in LLVM 8 and 9, originally 
from FreeBSD ( https://svnweb.freebsd.org/ports/head/lang/beignet/files/ 
) and also used by Arch.  However, when I attempted to use these patches 
in Debian, some of the tests crashed ( https://bugs.debian.org/948563 ):


--error 1--

compiler_rotate()ASSERTION FAILED: Unsupported intrinsics
  at file /build/beignet-1.3.2/backend/src/llvm/llvm_gen_backend.cpp,
function void gbe::GenWriter::regAllocateCallInst(llvm::CallInst&), line
3865
Trace/breakpoint trap

(Debian sets -DGBE_DEBUG=1 making this one a crash - without that, it's 
a silently wrong answer.)


--error 2--

compiler_subgroup_buffer_block_write_ui1()ASSERTION FAILED: index <
this->size()
  at file /build/beignet-1.3.2/backend/src/./sys/vector.hpp, function
T& gbe::vector::operator[](size_t) [with T = gbe::GenRegInterval;
size_t = long unsigned int], line 66
Trace/breakpoint trap

(This one is a crash with or without GBE_DEBUG, and affects (probably 
all of) compiler_subgroup_(image|buffer)_block_*.)




It is not known whether these errors also happen in FreeBSD, as their 
testing process didn't use beignet's own tests ( 
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=239175#c8 ).


Has anyone else seen this?  Any ideas for how to fix it?

Any thoughts on whether it's better to have beignet with these bugs, or 
no beignet at all?  (Debian does also have intel-opencl-icd (Neo), but 
it's currently broken ( https://bugs.debian.org/946752 ), and doesn't 
support older hardware.)


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Accept and ignore -g (rather than failing)

2019-02-07 Thread Rebecca N. Palmer
Passing -g through to LLVM fails the build, which violates the OpenCL 
2.0 standard, and breaks at least clblas stable: 
https://bugs.debian.org/881054


Signed-off-by: Rebecca N. Palmer 

--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -985,6 +985,12 @@ EXTEND_QUOTE:
   continue; // Don't push this str back; ignore it.
 }

+if(str == "-g") {
+// The OpenCL 2.0 standard requires accepting -g,
+// but does not require that it actually does anything
+continue;
+}
+
 clOpt.push_back(str);
   }
   free(c_str);

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] doZeroAddedOptimization bug

2019-01-06 Thread Rebecca N. Palmer

https://gitlab.freedesktop.org/beignet/beignet/issues/7

In Debian we're fully reverting 81755054 
(https://sources.debian.org/src/beignet/1.3.2-4/debian/patches/885423.patch/), 
but other comments suggest a smaller fix also works.


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Statically link to libllvm

2019-01-06 Thread Rebecca N. Palmer
Dynamically linked LLVM crashes when two libraries using the same
LLVM version (e.g. Beignet and another OpenCL ICD) are dlopen()ed
in the same application, due to shared global state:

https://bugs.llvm.org/show_bug.cgi?id=30587
https://bugs.debian.org/852746

Signed-off-by: Rebecca N. Palmer 

--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -87,18 +87,34 @@ execute_process(
   OUTPUT_STRIP_TRAILING_WHITESPACE
 )
 
+if (LLVM_VERSION_NODOT VERSION_GREATER 38)
+execute_process(
+  COMMAND ${LLVM_CONFIG_EXECUTABLE} --libs --link-static
+  OUTPUT_VARIABLE LLVM_MODULE_LIBS
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+else (LLVM_VERSION_NODOT VERSION_GREATER 38)
 execute_process(
   COMMAND ${LLVM_CONFIG_EXECUTABLE} --libs
   OUTPUT_VARIABLE LLVM_MODULE_LIBS
   OUTPUT_STRIP_TRAILING_WHITESPACE
 )
+endif (LLVM_VERSION_NODOT VERSION_GREATER 38)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
+if (LLVM_VERSION_NODOT VERSION_GREATER 38)
+execute_process(
+  COMMAND ${LLVM_CONFIG_EXECUTABLE} --system-libs --link-static
+  OUTPUT_VARIABLE LLVM_SYSTEM_LIBS_ORIG
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+else (LLVM_VERSION_NODOT VERSION_GREATER 38)
 execute_process(
   COMMAND ${LLVM_CONFIG_EXECUTABLE} --system-libs
   OUTPUT_VARIABLE LLVM_SYSTEM_LIBS_ORIG
   OUTPUT_STRIP_TRAILING_WHITESPACE
 )
+endif (LLVM_VERSION_NODOT VERSION_GREATER 38)
 if (LLVM_SYSTEM_LIBS_ORIG)
 string(REGEX REPLACE " *\n" "" LLVM_SYSTEM_LIBS ${LLVM_SYSTEM_LIBS_ORIG})
 endif (LLVM_SYSTEM_LIBS_ORIG)


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: clarify hardware support, add Neo reference

2019-01-06 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer 

---
Is it a typo that Apollolake is described as 5th (rather than 6th)
generation?  Should the reference to Broxten be removed, as the
hardware was reportedly cancelled?

The Neo supported list is taken from
https://github.com/intel/compute-runtime#supported-platforms

Beignet has supported Coffeelake since 7e181af2 (i.e. never in a stable
release - should that be specified if this is going on the website?).

--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -151,14 +151,23 @@ beignet provides two alternative to run:
 Supported Targets
 -
 
+Beignet is the recommended open source OpenCL driver for these integrated GPUs:
+
  * 3rd Generation Intel Core Processors "Ivybridge".
  * 3rd Generation Intel Atom Processors "BayTrail".
  * 4th Generation Intel Core Processors "Haswell", need kernel patch if your 
linux kernel older than 4.2, see the "Known Issues" section.
- * 5th Generation Intel Core Processors "Broadwell".
  * 5th Generation Intel Atom Processors "Braswell".
- * 6th Generation Intel Core Processors "Skylake" and "Kabylake".
+
+Beignet also supports these integrated GPUs, but [Intel Compute Runtime 
(Neo)](https://01.org/compute-runtime) also supports them and offers additional 
features:
+
+ * 5th Generation Intel Core Processors "Broadwell".
+ * 6th Generation Intel Core Processors "Skylake", "Kabylake" and "Coffeelake".
  * 5th Generation Intel Atom Processors "Broxten" or "Apollolake".
 
+Future GPUs will probably be supported by Intel Compute Runtime only.
+
+Beignet only supports running OpenCL on the integrated GPU, _not_ on the CPU 
itself (see [pocl](http://portablecl.org) for that).  Systems that also have a 
discrete GPU may disable the integrated GPU: check _xrandr --listproviders_ or 
see [here](https://nouveau.freedesktop.org/wiki/Optimus/).
+
 OpenCL 2.0
 --
 From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later hardware.

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: freedesktop.org have switched to gitlab

2019-01-06 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer 

diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
index 75039d41..57308a7a 100644
--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -227,7 +227,7 @@ Known Issues
 Project repository
 --
 Right now, we host our project on fdo at:
-[http://cgit.freedesktop.org/beignet/](http://cgit.freedesktop.org/beignet/).  
+[https://gitlab.freedesktop.org/beignet/beignet](https://gitlab.freedesktop.org/beignet/beignet).
  
 And the Intel 01.org:
 [https://01.org/beignet](https://01.org/beignet)
 
@@ -267,8 +267,8 @@ You are always welcome to contribute to this project, just 
need to subscribe
 to the beignet mail list and send patches to it for review.
 The official mail list is as below:
 
[http://lists.freedesktop.org/mailman/listinfo/beignet](http://lists.freedesktop.org/mailman/listinfo/beignet)
  
-The official bugzilla is at:
-[https://bugs.freedesktop.org/enter_bug.cgi?product=Beignet](https://bugs.freedesktop.org/enter_bug.cgi?product=Beignet)
  
+The official bug tracker is at:
+[https://gitlab.freedesktop.org/beignet/beignet/issues](https://gitlab.freedesktop.org/beignet/beignet/issues)
  
 You are welcome to submit beignet bug. Please be noted, please specify the 
exact platform
 information, such as BYT/IVB/HSW/BDW, and GT1/GT2/GT3. You can easily get this 
information
 by running the beignet's unit test.

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Support reproducible building

2019-01-06 Thread Rebecca N. Palmer
See https://reproducible-builds.org for what this means.

Making beignet reproducible requires additional setup including Debian's 
patched Clang (https://bugs.debian.org/877359), but building it with 
this patch does not.

Signed-off-by: Rebecca N. Palmer 
---
The first link is currently down; I don't know why or for how long.

--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -229,6 +229,8 @@ ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_D
 
 ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.local.pch
 COMMAND mkdir -p ${OCL_OBJECT_DIR}
+# the tests are this way round to return success whether or not 
SOURCE_DATE_EPOCH is set
+COMMAND test -z $ENV{SOURCE_DATE_EPOCH} || touch -d 
'@$ENV{SOURCE_DATE_EPOCH}' ${OCL_OBJECT_DIR}/include/*.h
 COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -I 
${OCL_OBJECT_DIR}/include/ -emit-pch -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o 
${OCL_OBJECT_DIR}/beignet.local.pch
 DEPENDS ${OCL_HEADER_FILES}
 COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet.local.pch"
@@ -236,6 +238,7 @@ ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_D
 
 ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.pch
 COMMAND mkdir -p ${OCL_OBJECT_DIR}
+COMMAND test -z $ENV{SOURCE_DATE_EPOCH} || touch -d 
'@$ENV{SOURCE_DATE_EPOCH}' ${OCL_OBJECT_DIR}/include/*.h
 COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -I 
${OCL_OBJECT_DIR}/include/ --relocatable-pch -emit-pch -isysroot 
${LIBOCL_BINARY_DIR} -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o 
${OCL_OBJECT_DIR}/beignet.pch
 DEPENDS ${OCL_HEADER_FILES}
 COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet.pch"
@@ -273,6 +276,7 @@ if (ENABLE_OPENCL_20)
 
   ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet_20.local.pch
 COMMAND mkdir -p ${OCL_OBJECT_DIR}
+COMMAND test -z $ENV{SOURCE_DATE_EPOCH} || touch -d 
'@$ENV{SOURCE_DATE_EPOCH}' ${OCL_OBJECT_DIR}/include/*.h
 COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS_20} -I 
${OCL_OBJECT_DIR}/include/ -emit-pch -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o 
${OCL_OBJECT_DIR}/beignet_20.local.pch
 DEPENDS ${OCL_HEADER_FILES}
 COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet_20.local.pch"
@@ -280,6 +284,7 @@ if (ENABLE_OPENCL_20)
 
   ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet_20.pch
 COMMAND mkdir -p ${OCL_OBJECT_DIR}
+COMMAND test -z $ENV{SOURCE_DATE_EPOCH} || touch -d 
'@$ENV{SOURCE_DATE_EPOCH}' ${OCL_OBJECT_DIR}/include/*.h
 COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS_20} -I 
${OCL_OBJECT_DIR}/include/ --relocatable-pch -emit-pch -isysroot 
${LIBOCL_BINARY_DIR} -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o 
${OCL_OBJECT_DIR}/beignet_20.pch
 DEPENDS ${OCL_HEADER_FILES}
 COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet_20.pch"

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Fix crash in loop unrolling

2019-01-06 Thread Rebecca N. Palmer
LLVM 6+'s llvm::LoopInfo::erase(), unlike its predecessor 
llvm::LoopInfo::markAsRemoved(), doesn't necessarily remove the loop it 
erases from the to-be-processed queue.  Do this explicitly, as trying to 
process an erased (0-block) loop is likely to crash, e.g. in getHeader().

Signed-off-by: Rebecca N. Palmer 
---
Looks like I introduced this (in 6e60548a) - sorry.

Examples at https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=913141

diff --git a/backend/src/llvm/llvm_unroll.cpp b/backend/src/llvm/llvm_unroll.cpp
index 813e116d..f378b63d 100644
--- a/backend/src/llvm/llvm_unroll.cpp
+++ b/backend/src/llvm/llvm_unroll.cpp
@@ -206,6 +206,7 @@ namespace gbe {
 //Don't change the unrollID if doesn't force unroll.
 //setUnrollID(parentL, false);
 #if LLVM_VERSION_MAJOR >= 6
+LPM.markLoopAsDeleted(*parentL);
 loopInfo.erase(parentL);
 #elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
 loopInfo.markAsRemoved(parentL);

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] intel: Fall back to DRM render nodes under Wayland

2018-07-31 Thread Rebecca N. Palmer
Does your system need this patch to make Beignet work at all, or only to 
get rid of the message?  What hardware (lspci -nn | grep 0300) and what 
version of Beignet?


That message is known to trigger on Wayland, but when I tested it, 
Beignet still worked after it was shown.  It has already been removed 
(but without your other changes) in git master.


https://lists.freedesktop.org/archives/beignet/2018-January/009183.html
https://cgit.freedesktop.org/beignet/commit/?id=d1b99a1da56757971753288986419f1b8b9d55f4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] cl_ext.h vs cl_ext_intel.h

2018-07-22 Thread Rebecca N. Palmer
Some Intel-specific extensions (e.g. cl_intel_accelerator) are in 
cl_ext.h in Beignet's included OpenCL headers, but in cl_ext_intel.h in 
the upstream (Khronos) OpenCL headers:

https://github.com/KhronosGroup/OpenCL-Headers/tree/master/CL

What are your plans here?  (This is an API change for users of these 
extensions, but there does not appear to be any such code in Debian 
outside beignet itself.)


Debian is currently using this workaround to build beignet with Khronos 
headers:

https://salsa.debian.org/opencl-team/beignet/blob/master/debian/patches/cl_accelerator_intel.patch

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Anything from master that should be in (Debian) 1.3?

2018-07-22 Thread Rebecca N. Palmer
I plan to update Debian's beignet package soon, with some of the patches 
I recently sent.


Is there anything else, e.g. from the master branch, that I should include?

In particular, these two look reasonable and build in 1.3, but I don't 
have the hardware to check whether they are useful:

7e181af2 Enable Coffee Lake support
b70d65ba Ensure that DRM device uses the i915 driver

(Debian doesn't need "Fix enabling of fp64 extension" because it doesn't 
enable fp64, and already has the preceding three commits.


And yes, we probably should package compute-runtime...)

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Allow creating out-of-order queues with clCreateCommandQueue

2018-07-21 Thread Rebecca N. Palmer
clCreateCommandQueueWithProperties can already create them, but
that's a 2.0 function.

Signed-off-by: Rebecca N. Palmer 
---
yes, this currently gives you out-of-order if you ask for in-order,
but says "can't do that" if you ask for out-of-order...

--- a/src/cl_api_command_queue.c
+++ b/src/cl_api_command_queue.c
@@ -27,35 +27,11 @@ clCreateCommandQueue(cl_context context,
  cl_command_queue_properties properties,
  cl_int *errcode_ret)
 {
-  cl_command_queue queue = NULL;
-  cl_int err = CL_SUCCESS;
-
-  do {
-if (!CL_OBJECT_IS_CONTEXT(context)) {
-  err = CL_INVALID_CONTEXT;
-  break;
-}
-
-err = cl_devices_list_include_check(context->device_num, context->devices, 
1, );
-if (err)
-  break;
-
-if (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | 
CL_QUEUE_PROFILING_ENABLE)) {
-  err = CL_INVALID_VALUE;
-  break;
-}
-
-if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /*not supported 
now.*/
-  err = CL_INVALID_QUEUE_PROPERTIES;
-  break;
-}
-
-queue = cl_create_command_queue(context, device, properties, 0, );
-  } while (0);
-
-  if (errcode_ret)
-*errcode_ret = err;
-  return queue;
+  cl_queue_properties props[3];
+  props[0] = CL_QUEUE_PROPERTIES;
+  props[1] = properties;
+  props[2] = 0;
+  return clCreateCommandQueueWithProperties(context, device, props, 
errcode_ret);
 }
 
 /* 2.0 new API for create command queue. */

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Make in-order command queues actually be in-order

2018-07-21 Thread Rebecca N. Palmer
A demonstration that "in-order" queues currently aren't:

//g++ -o queue_order_test queue_order_test.c -lOpenCL
//Depends: beignet-opencl-icd ocl-icd-opencl-dev
#include 
#include 
int main()
{
  cl_int status;
  cl_device_id device;
clGetDeviceIDs(NULL,CL_DEVICE_TYPE_ALL,1,,NULL);
char device_name[101];
device_name[100]=0;
clGetDeviceInfo(device,CL_DEVICE_NAME,100,device_name,NULL);
printf("Using device %s",device_name);
cl_context ctx;
  cl_command_queue queue;
  cl_program program1,program2;
  cl_kernel kernel1,kernel2;
  cl_mem buffer;
  cl_event uevent1,uevent2,kernels_finished[2];
  size_t n = 3;
  cl_int test_data[3] = {3, 7, 5};
  const char* kernel1_source = "__kernel void test1(__global int *buf) {"
  "printf(\"kern1 \");"
  "  buf[get_global_id(0)] = 2* buf[get_global_id(0)];"
  "}";
  const char* kernel2_source = "__kernel void test2(__global int *buf) {"
  "printf(\"kern2 \");"
  "  buf[get_global_id(0)] = 9+ buf[get_global_id(0)];"
  "}";
  //Expected result: 15 23 19 if 1 runs first (in-order queue), 24 32 28 if 2 
runs first (out-of-order queue)
  ctx = clCreateContext(NULL, 1, , NULL, NULL, );
  if(!ctx)
return 1;

//cl_queue_properties 
qsettings[3]={CL_QUEUE_PROPERTIES,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,0};
cl_queue_properties qsettings[3]={CL_QUEUE_PROPERTIES,0,0};
queue = clCreateCommandQueueWithProperties(ctx, device, qsettings, );
//queue = clCreateCommandQueueWithProperties(ctx, device, 0, );
cl_command_queue_properties qp;
clGetCommandQueueInfo(queue,CL_QUEUE_PROPERTIES,sizeof(qp),,NULL);
printf(" queue properties %i\n",qp);
program1 = clCreateProgramWithSource(ctx, 1, _source, NULL, );
clBuildProgram(program1, 1, , "", NULL, NULL);
kernel1 = clCreateKernel(program1, "test1", );
program2 = clCreateProgramWithSource(ctx, 1, _source, NULL, );
clBuildProgram(program2, 1, , "", NULL, NULL);
kernel2 = clCreateKernel(program2, "test2", );
buffer = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n*4, test_data, );
uevent1=clCreateUserEvent(ctx,);
uevent2=clCreateUserEvent(ctx,);
clSetKernelArg(kernel1, 0, sizeof(cl_mem), );
clSetKernelArg(kernel2, 0, sizeof(cl_mem), );
clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, , , 1,, 
_finished[0]);
clEnqueueNDRangeKernel(queue, kernel2, 1, NULL, , , 0,NULL, 
_finished[1]);//without uevent2, bypasses queue
//clEnqueueNDRangeKernel(queue, kernel2, 1, NULL, , , 1,, 
_finished[1]);
clSetUserEventStatus(uevent2,CL_COMPLETE);
printf("\nsetting event %p (others %p %p) - enter a 
number\n",uevent1,kernels_finished[0],kernels_finished[1]);
int j;scanf("%i",);
clSetUserEventStatus(uevent1,CL_COMPLETE);
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 2, 
kernels_finished, NULL);
printf("\nresult: %i %i %i\n",test_data[0],test_data[1],test_data[2]);
}

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Make in-order command queues actually be in-order

2018-07-21 Thread Rebecca N. Palmer
When beignet added out-of-order execution support (7fd45f15),
it made *all* command queues out-of-order, even if they were
created as (and are reported by clGetCommandQueueInfo as) in-order.

Signed-off-by: Rebecca N. Palmer 
---
Not sure whether this one is actually worth it: it's clearly
against the spec, but I'm not aware of it causing any
real-world bugs.  (I noticed it while investigating
an issue that turned out to be unrelated.)  Users who expect a
queue to be in-order are probably not using events, and that
makes a beignet queue effectively in-order.

(This is *not* true of out-of-order queues in some other ICDs,
e.g. pocl: it is true in Beignet because our flush (in particular
the implicit one before a blocking copy) is also an ordering
barrier, but the spec doesn't require that.  If you choose not to
take this, it might be a good idea to add a comment to
cl_command_queue_wait_flush documenting that.)

--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -283,7 +283,7 @@ clEnqueueSVMFree (cl_command_queue comma
 data->size  = num_svm_pointers;
 data->ptr   = user_data;
 
-if (e_status == CL_COMPLETE) {
+if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == 
CL_COMPLETE)) {
   // Sync mode, no need to queue event.
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
@@ -429,7 +429,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_qu
 data->const_ptr= src_ptr;
 data->size = size;
 
-if (e_status == CL_COMPLETE) {
+if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == 
CL_COMPLETE)) {
   // Sync mode, no need to queue event.
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
@@ -441,6 +441,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_qu
 break;
   }
   cl_command_queue_enqueue_event(command_queue, e);
+  if (blocking_copy) {
+cl_event_wait_for_events_list(1, );
+  }
 }
   } while(0);
 
@@ -518,7 +521,7 @@ cl_int clEnqueueSVMMemFill (cl_command_q
 data->pattern_size = pattern_size;
 data->size = size;
 
-if (e_status == CL_COMPLETE) {
+if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == 
CL_COMPLETE)) {
   // Sync mode, no need to queue event.
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
--- a/src/cl_api_kernel.c
+++ b/src/cl_api_kernel.c
@@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue
 count *= global_wk_sz_rem[2] ? 2 : 1;
 
 const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem};
+cl_bool allow_immediate_submit = 
cl_command_queue_allow_bypass_submit(command_queue);
 /* Go through the at most 8 cases and euque if there is work items left */
 for (i = 0; i < 2; i++) {
   for (j = 0; j < 2; j++) {
@@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue
 break;
   }
 
-  err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : 
CL_QUEUED), CL_FALSE);
+  err = cl_event_exec(e, ((allow_immediate_submit && event_status == 
CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
   if (err != CL_SUCCESS) {
 break;
   }
--- a/src/cl_api_mem.c
+++ b/src/cl_api_mem.c
@@ -309,7 +309,7 @@ clEnqueueMapBuffer(cl_command_queue comm
 if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
   data->write_map = 1;
 
-if (e_status == CL_COMPLETE) {
+if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == 
CL_COMPLETE)) {
   // Sync mode, no need to queue event.
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
@@ -322,6 +322,9 @@ clEnqueueMapBuffer(cl_command_queue comm
   }
 
   cl_command_queue_enqueue_event(command_queue, e);
+  if (blocking_map) {
+cl_event_wait_for_events_list(1, );
+  }
 }
 
 ptr = data->ptr;
@@ -469,7 +472,7 @@ clEnqueueUnmapMemObject(cl_command_queue
 data->mem_obj = memobj;
 data->ptr = mapped_ptr;
 
-if (e_status == CL_COMPLETE) { // No need to wait
+if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == 
CL_COMPLETE)) { // No need to wait
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
 break;
@@ -571,7 +574,7 @@ clEnqueueReadBuffer(cl_command_queue com
 data->offset = offset;
 data->size = size;
 
-if (e_status == CL_COMPLETE) {
+if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == 
CL_COMPLETE)) {
   // Sync mode, no need to queue event.
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
@@ -583,6 +586,9 @@ clEnqueueReadBuffer(cl_command_queue com
 break;
   }
   cl_command_queue_enqueue_event(command_queue, e);

[Beignet] [PATCH] Add preliminary LLVM 7 support

2018-07-21 Thread Rebecca N. Palmer
This is preliminary because LLVM 7 has not been released yet:
it was tested with the snapshot from Debian experimental (svn336894).

1.Change linking order, as clangCodeGen now links to clangFrontend
2.Pass references not pointers to WriteBitcodeToFile and CloneModule
3.Add the headers that LoopSimplifyID, LCSSAID and
some create*Pass have moved to
4.Define our DEBUG whether or not we just undefined LLVM's
(theirs is now LLVM_DEBUG, but we never actually use it)

Signed-off-by: Rebecca N. Palmer 

--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -113,10 +113,10 @@ macro(add_one_lib name)
 endmacro()
 
 #Assume clang lib path same as llvm lib path
+add_one_lib("clangCodeGen")
 add_one_lib("clangFrontend")
 add_one_lib("clangSerialization")
 add_one_lib("clangDriver")
-add_one_lib("clangCodeGen")
 add_one_lib("clangSema")
 add_one_lib("clangStaticAnalyzerFrontend")
 add_one_lib("clangStaticAnalyzerCheckers")
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -454,7 +454,11 @@ namespace gbe {
 #ifdef GBE_COMPILER_AVAILABLE
   std::string str;
   llvm::raw_string_ostream OS(str);
+#if LLVM_VERSION_MAJOR >= 7
+  llvm::WriteBitcodeToFile(*((llvm::Module*)prog->module), OS);
+#else
   llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+#endif
   std::string& bin_str = OS.str();
   int llsz = bin_str.size();
   *binary = (char *)malloc(sizeof(char) * (llsz+1) );
@@ -545,7 +549,11 @@ namespace gbe {
 );
 src = llvm::unwrap(modRef);
   }
+#if LLVM_VERSION_MAJOR >= 7
+  llvm::Module* clone = llvm::CloneModule(*src).release();
+#else
   llvm::Module* clone = llvm::CloneModule(src).release();
+#endif
   if (LLVMLinkModules2(wrap(dst), wrap(clone))) {
 #elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37
   if (LLVMLinkModules(wrap(dst), wrap(src), 
LLVMLinkerPreserveSource_Removed, )) {
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -794,7 +794,11 @@ namespace gbe {
   llvm::raw_fd_ostream ostream (dumpSPIRBinaryName.c_str(),
 err, llvm::sys::fs::F_None);
   if (!err)
+#if LLVM_VERSION_MAJOR<7
 llvm::WriteBitcodeToFile(*out_module, ostream);
+#else
+llvm::WriteBitcodeToFile(**out_module, ostream);
+#endif
 }
 #endif
 return true;
--- a/backend/src/llvm/llvm_bitcode_link.cpp
+++ b/backend/src/llvm/llvm_bitcode_link.cpp
@@ -340,7 +340,11 @@ namespace gbe
 /* We use beignet's bitcode as dst because it will have a lot of
lazy functions which will not be loaded. */
 #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39
+#if LLVM_VERSION_MAJOR >= 7
+llvm::Module * linked_module = 
llvm::CloneModule(*(llvm::Module*)mod).release();
+#else
 llvm::Module * linked_module = 
llvm::CloneModule((llvm::Module*)mod).release();
+#endif
 if(LLVMLinkModules2(wrap(clonedLib), wrap(linked_module))) {
 #else
 char* errorMsg;
--- a/backend/src/llvm/llvm_includes.hpp
+++ b/backend/src/llvm/llvm_includes.hpp
@@ -89,6 +89,10 @@
 #include "llvm/CodeGen/IntrinsicLowering.h"
 
 #include "llvm/Transforms/Scalar.h"
+#if LLVM_VERSION_MAJOR >= 7
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#endif
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
--- a/backend/src/llvm/ExpandLargeIntegers.cpp
+++ b/backend/src/llvm/ExpandLargeIntegers.cpp
@@ -99,8 +99,8 @@ using namespace llvm;
 
 #ifdef DEBUG
   #undef DEBUG
-  #define DEBUG(...)
 #endif
+#define DEBUG(...)
 // Break instructions up into no larger than 64-bit chunks.
 static const unsigned kChunkBits = 64;
 static const unsigned kChunkBytes = kChunkBits / CHAR_BIT;

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Add LLVM 6.0 support

2018-07-21 Thread Rebecca N. Palmer
LLVMContext::setDiagnosticHandler and LoopInfo::markAsRemoved
have been renamed.

Signed-off-by: Rebecca N. Palmer 

--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -322,7 +322,11 @@ namespace gbe
 DataLayout DL();
 
 gbeDiagnosticContext dc;
+#if LLVM_VERSION_MAJOR >= 6
+mod.getContext().setDiagnosticHandlerCallBack(,);
+#else
 mod.getContext().setDiagnosticHandler(,);
+#endif
 
 #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37
 mod.setDataLayout(DL);
--- a/backend/src/llvm/llvm_unroll.cpp
+++ b/backend/src/llvm/llvm_unroll.cpp
@@ -205,7 +205,9 @@ namespace gbe {
   if (parentTripCount != 0 && currTripCount * parentTripCount > 32) {
 //Don't change the unrollID if doesn't force unroll.
 //setUnrollID(parentL, false);
-#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
+#if LLVM_VERSION_MAJOR >= 6
+loopInfo.erase(parentL);
+#elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
 loopInfo.markAsRemoved(parentL);
 #else
 LPM.deleteLoopFromQueue(parentL);

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] More user-friendly "type not supported" errors

2018-07-21 Thread Rebecca N. Palmer
Output a meaningful error message instead of just sel.has*Type.
In the case of double inputs (i.e. possibly literals), specify
how to make a literal single precision.
 
Signed-off-by: Rebecca N. Palmer 
---
Previously submitted as
https://lists.freedesktop.org/archives/beignet/2017-September/009169.html
without response; carried in Debian without known issues since
Nov 2017.

It would be even better, but more difficult, to also make these
return build failure instead of asserting.

--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -5248,7 +5248,7 @@ extern bool OCL_DEBUGINFO; // first defi
   write64Stateless(sel, address, src);
 sel.pop();
   } else {
-GBE_ASSERT(sel.hasLongType());
+GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this 
device");
 write64Stateless(sel, address, src);
   }
 }
@@ -5833,7 +5833,7 @@ extern bool OCL_DEBUGINFO; // first defi
 
 /* The special case, when dst is half, float->word->half will lose 
accuracy. */
 if (dstType == TYPE_HALF) {
-  GBE_ASSERT(sel.hasHalfType());
+  GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this 
device");
   type = GEN_TYPE_HF;
 }
 
@@ -5874,7 +5874,7 @@ extern bool OCL_DEBUGINFO; // first defi
 
   if (dstType == TYPE_HALF) {
 /* There is no MOV for Long <---> Half. So Long-->Float-->half. */
-GBE_ASSERT(sel.hasLongType());
+GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this 
device");
 GBE_ASSERT(sel.hasHalfType());
 sel.push();
 if (sel.isScalarReg(insn.getSrc(0))) {
@@ -6176,7 +6176,7 @@ extern bool OCL_DEBUGINFO; // first defi
 }
   } else if (srcType == ir::TYPE_HALF) {
 /* No need to consider old platform. if we support half, we must have 
native long. */
-GBE_ASSERT(sel.hasLongType());
+GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this 
device");
 GBE_ASSERT(sel.hasHalfType());
 uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D;
 GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, 
sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type);
@@ -6200,7 +6200,7 @@ extern bool OCL_DEBUGINFO; // first defi
   sel.MOV(dst, tmp);
 }
   } else if (src.type == GEN_TYPE_DF) {
-GBE_ASSERT(sel.hasDoubleType());
+GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on 
this device");
 GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we 
support native long.
 
 // Just Mov
@@ -6219,7 +6219,7 @@ extern bool OCL_DEBUGINFO; // first defi
   const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
   const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
 
-  GBE_ASSERT(sel.hasDoubleType());
+  GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this 
device (if this is a literal, use '1.0f' not '1.0')");
 
   if (sel.isScalarReg(insn.getDst(0))) {
 // dst is scalar, just MOV and nothing more.
@@ -6258,7 +6258,7 @@ extern bool OCL_DEBUGINFO; // first defi
   const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
   const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
 
-  GBE_ASSERT(sel.hasDoubleType());
+  GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this 
device (if this is a literal, use '1.0f' not '1.0')");
   GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we 
support half.
 
   if (sel.isScalarReg(insn.getDst(0))) { // uniform case.
@@ -6324,7 +6324,7 @@ extern bool OCL_DEBUGINFO; // first defi
   // Special case, half -> char/short.
   /* [DevBDW+]:Format conversion to or from HF (Half Float) must be 
DWord-aligned and
  strided by a DWord on the destination. */
-  GBE_ASSERT(sel.hasHalfType());
+  GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this 
device");
   GenRegister tmp;
   sel.push();
   if (sel.isScalarReg(insn.getSrc(0))) {
@@ -6356,7 +6356,7 @@ extern bool OCL_DEBUGINFO; // first defi
   // Special case, char/uchar -> half
   /* [DevBDW+]:  Format conversion to or from HF (Half Float) must be 
DWord-aligned and
  strided by a DWord on the destination. */
-  GBE_ASSERT(sel.hasHalfType());
+  GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this 
device");
   GenRegister tmp = 
GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, 
sel.isScalarReg(insn.getSrc(0, GEN_TYPE_HF);
   sel.push();
   if (sel.isScalarReg(insn.getSrc(0))) {
@@ -6378,7 +6378,7 @@ extern bool OCL_DEBUGINFO; // first defi
   const GenRegister src = sel.selReg(insn.getS

[Beignet] [PATCH] Don't leak memory on long chains of events

2018-07-21 Thread Rebecca N. Palmer
Delete event->depend_events when it is no longer needed, to allow
the event objects it refers to to be freed.

This avoids out-of-memory hangs in large dependency trees
(e.g. long iterative calculations):
https://launchpad.net/bugs/1354086

Signed-off-by: Rebecca N. Palmer 
---
*Possibly* also https://bugs.freedesktop.org/show_bug.cgi?id=102509

pocl does something similar:
https://sources.debian.org/src/pocl/1.1-5/lib/CL/devices/common.c/?hl=722#L714
Neo instead has the pointers go the other way (from an event to
the events waiting for it), but that would be harder and riskier
to convert existing code to.

--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -183,6 +183,25 @@ cl_event_new(cl_context ctx, cl_command_
   return e;
 }
 
+/* This exists to prevent long chains of events from filling up memory 
(https://bugs.launchpad.net/ubuntu/+source/beignet/+bug/1354086).  Call only 
after the dependencies are complete, or failed and marked as such in this 
event's status, or when this event is being destroyed */
+LOCAL void
+cl_event_delete_depslist(cl_event event)
+{
+  CL_OBJECT_LOCK(event);
+  cl_event *old_depend_events = event->depend_events;
+  int depend_count = event->depend_event_num;
+  event->depend_event_num = 0;
+  event->depend_events = NULL;
+  CL_OBJECT_UNLOCK(event);
+  if (old_depend_events) {
+assert(depend_count);
+for (int i = 0; i < depend_count; i++) {
+  cl_event_delete(old_depend_events[i]);
+}
+cl_free(old_depend_events);
+  }
+}
+
 LOCAL void
 cl_event_delete(cl_event event)
 {
@@ -199,13 +218,7 @@ cl_event_delete(cl_event event)
 
   assert(list_node_out_of_list(>enqueue_node));
 
-  if (event->depend_events) {
-assert(event->depend_event_num);
-for (i = 0; i < event->depend_event_num; i++) {
-  cl_event_delete(event->depend_events[i]);
-}
-cl_free(event->depend_events);
-  }
+  cl_event_delete_depslist(event);
 
   /* Free all the callbacks. Last ref, no need to lock. */
   while (!list_empty(>callbacks)) {
@@ -565,8 +578,12 @@ cl_event_exec(cl_event event, cl_int exe
   assert(depend_status <= CL_COMPLETE || ignore_depends || exec_to_status == 
CL_QUEUED);
   if (depend_status < CL_COMPLETE) { // Error happend, cancel exec.
 ret = cl_event_set_status(event, depend_status);
+cl_event_delete_depslist(event);
 return depend_status;
   }
+  if (depend_status == CL_COMPLETE) { // Avoid memory leak
+cl_event_delete_depslist(event);
+  }
 
   if (cur_status <= exec_to_status) {
 return ret;
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -44,7 +44,7 @@ typedef struct _cl_event {
   cl_command_type event_type; /* Event type. */
   cl_bool is_barrier; /* Is this event a barrier */
   cl_int status;  /* The execution status */
-  cl_event *depend_events;/* The events must complete before this. */
+  cl_event *depend_events;/* The events must complete before this. May 
disappear after they have completed - see cl_event_delete_depslist*/
   cl_uint depend_event_num;   /* The depend events number. */
   list_head callbacks;/* The events The event callback functions */
   list_node enqueue_node; /* The node in the enqueue list. */

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Atom 3845

2018-01-27 Thread Rebecca N. Palmer
That wording implies this is Debian beignet (upstream beignet uses the 
wording "cl_get_gt_device(): error, unknown device: %x"), which is 1.3.0 
in stretch.  This should support Bay Trail, but I don't have the 
hardware to actually try this.


Please post the output of

xrandr --listproviders
glxinfo | grep " vendor\| renderer"
lspci -nn | grep -e "\[03..\]:"

Supported Bay Trail devices have ID [8086:0f31] in the last of these.

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Is "X server found. dri2 connection failed!" normal on Wayland?

2018-01-08 Thread Rebecca N. Palmer
...and if it is, should we stop printing a warning for it to avoid 
pointlessly scaring users?


https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=882486

When I get this warning, everything still seems to work (e.g. the test 
suite passes).  Some other reports have both it and another error (e.g. 
https://bugzilla.redhat.com/show_bug.cgi?id=1478536 ,
https://bugzilla.redhat.com/show_bug.cgi?id=1460400 ), but these may be 
unrelated bugs (e.g. the second is 
https://bugs.freedesktop.org/show_bug.cgi?id=101485 ).


Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -235,8 +235,6 @@ if(intel->x11_display) {
 intel_driver_init_shared(intel, intel->dri_ctx);
 Xfree(driver_name);
   }
-  else
-fprintf(stderr, "X server found. dri2 connection failed! \n");
 }
 #endif


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: Fix grammar

2017-10-27 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/docs/Beignet/Backend.mdwn
+++ b/docs/Beignet/Backend.mdwn
@@ -9,10 +9,10 @@ Status
 --
 
 After two years development, beignet is mature now. It now supports all the
-OpenCL 1.2 mandatory features. Beignet get almost 100% pass rate with both
-OpenCV 3.0 test suite and the piglit opencl test suite. There are some 
-performance tuning related items remained, see [[here|Backend/TODO]] for a
-(incomplete) lists of things to do.
+OpenCL 1.2 mandatory features. Beignet gets almost 100% pass rate with both
+the OpenCV 3.0 test suite and the piglit opencl test suite. There are some 
+performance tuning related items remained, see [[here|Backend/TODO]] for an
+(incomplete) list of things to do.
 
 Interface with the run-time
 ---
@@ -61,7 +61,7 @@ Environment variables are used all over
 - `OCL_OUTPUT_REG_ALLOC` `(0 or 1)`. Output Gen register allocations, including
   virtual register to physical register mapping, live ranges.
 
-- `OCL_OUTPUT_BUILD_LOG` `(0 or 1)`. Output error messages if there is any
+- `OCL_OUTPUT_BUILD_LOG` `(0 or 1)`. Output error messages if there are any
   during CL kernel compiling and linking.
 
 - `OCL_OUTPUT_CFG` `(0 or 1)`. Output control flow graph in .dot file.
@@ -70,22 +70,22 @@ Environment variables are used all over
   but without instructions in each BasicBlock.
 
 - `OCL_PRE_ALLOC_INSN_SCHEDULE` `(0 or 1)`. The instruction scheduler in
-  beignet are currently splitted into two passes: before and after register
-  allocation. The pre-alloc scheduler tend to decrease register pressure.
+  beignet is currently split into two passes: before and after register
+  allocation. The pre-alloc scheduler tends to decrease register pressure.
   This variable is used to disable/enable pre-alloc scheduler. This pass is
   disabled now for some bugs.
 
 - `OCL_POST_ALLOC_INSN_SCHEDULE` `(0 or 1)`. Disable/enable post-alloc
-  instruction scheduler. The post-alloc scheduler tend to reduce instruction
+  instruction scheduler. The post-alloc scheduler tends to reduce instruction
   latency. By default, this is enabled now.
 
-- `OCL_SIMD16_SPILL_THRESHOLD` `(0 to 256)`. Tune how much registers can be
-  spilled under SIMD16. Default value is 16. We find spill too much register
-  under SIMD16 is not as good as fall back to SIMD8 mode. So we set the
+- `OCL_SIMD16_SPILL_THRESHOLD` `(0 to 256)`. Tune how many registers can be
+  spilled under SIMD16. Default value is 16. We find spilling too many 
registers
+  under SIMD16 is not as good as falling back to SIMD8 mode. So we set the
   variable to control spilled register number under SIMD16.
 
 - `OCL_USE_PCH` `(0 or 1)`. The default value is 1. If it is enabled, we use
-  a pre compiled header file which include all basic ocl headers. This would
+  a pre compiled header file which includes all basic ocl headers. This would
   reduce the compile time.
 
 Implementation details

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: OCL_STRICT_CONFORMANCE is default-on since 1.1

2017-10-27 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/docs/Beignet/Backend.mdwn
+++ b/docs/Beignet/Backend.mdwn
@@ -37,9 +37,7 @@ Environment variables are used all over
   precision math instructions compliant with OpenCL Spec. So we provide a
   software version to meet the high precision requirement. Obviously the
   software version's performance is not as good as native version supported by
-  GEN hardware. What's more, most graphics application don't need this high
-  precision, so we choose 0 as the default value. So OpenCL apps do not suffer
-  the performance penalty for using high precision math functions.
+  GEN hardware.
 
 - `OCL_SIMD_WIDTH` `(8 or 16)`. Select the number of lanes per hardware thread,
   Normally, you don't need to set it, we will select suitable simd width for

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] beignet and LLVM 4/5

2017-10-04 Thread Rebecca N. Palmer
Such patches were pushed to the 1.3 branch 12 days ago, and appear to 
work, but I haven't yet checked for the bug Fedora had.


If (as is likely) Debian beignet gets asked to switch soon, should I use 
current 1.3 git?  and LLVM 4 or 5?


On 10/09/17 22:26, Rebecca N. Palmer wrote:

(I'm asking for Debian - Fedora is already using LLVM 4 + git beignet)

On 08/09/17 07:57, Yang, Rong R wrote:
 LLVM 5. 0 has been released, we are planning to release a minor 
release 1.3.2 to support LLVM 4.0 and

LLVM 5.0 after beignet's LLVM5.0 patches are ready.


Roughly how long do you expect this to take?


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] More user-friendly 'type not supported' errors

2017-09-24 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
I'd actually like to make these (and all "invalid input" errors) return 
CL_BUILD_PROGRAM_FAILURE instead of asserting, but that's a little 
harder.  (We have at least two mechanisms for that - GenContext.errCode 
and gbeDiagnosticContext - but it can be non-obvious how to reach them 
from deep inside the backend.)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index ea1cd5c..95d10ab 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -5248,7 +5248,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   write64Stateless(sel, address, src);
 sel.pop();
   } else {
-GBE_ASSERT(sel.hasLongType());
+GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this 
device");
 write64Stateless(sel, address, src);
   }
 }
@@ -5833,7 +5833,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 
 /* The special case, when dst is half, float->word->half will lose 
accuracy. */
 if (dstType == TYPE_HALF) {
-  GBE_ASSERT(sel.hasHalfType());
+  GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this 
device");
   type = GEN_TYPE_HF;
 }
 
@@ -5874,7 +5874,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 
   if (dstType == TYPE_HALF) {
 /* There is no MOV for Long <---> Half. So Long-->Float-->half. */
-GBE_ASSERT(sel.hasLongType());
+GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this 
device");
 GBE_ASSERT(sel.hasHalfType());
 sel.push();
 if (sel.isScalarReg(insn.getSrc(0))) {
@@ -6176,7 +6176,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 }
   } else if (srcType == ir::TYPE_HALF) {
 /* No need to consider old platform. if we support half, we must have 
native long. */
-GBE_ASSERT(sel.hasLongType());
+GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this 
device");
 GBE_ASSERT(sel.hasHalfType());
 uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D;
 GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, 
sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type);
@@ -6200,7 +6200,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   sel.MOV(dst, tmp);
 }
   } else if (src.type == GEN_TYPE_DF) {
-GBE_ASSERT(sel.hasDoubleType());
+GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on 
this device");
 GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we 
support native long.
 
 // Just Mov
@@ -6219,7 +6219,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
   const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
 
-  GBE_ASSERT(sel.hasDoubleType());
+  GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this 
device (if this is a literal, use '1.0f' not '1.0')");
 
   if (sel.isScalarReg(insn.getDst(0))) {
 // dst is scalar, just MOV and nothing more.
@@ -6258,7 +6258,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
   const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
 
-  GBE_ASSERT(sel.hasDoubleType());
+  GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this 
device (if this is a literal, use '1.0f' not '1.0')");
   GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we 
support half.
 
   if (sel.isScalarReg(insn.getDst(0))) { // uniform case.
@@ -6324,7 +6324,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   // Special case, half -> char/short.
   /* [DevBDW+]:Format conversion to or from HF (Half Float) must be 
DWord-aligned and
  strided by a DWord on the destination. */
-  GBE_ASSERT(sel.hasHalfType());
+  GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this 
device");
   GenRegister tmp;
   sel.push();
   if (sel.isScalarReg(insn.getSrc(0))) {
@@ -6356,7 +6356,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   // Special case, char/uchar -> half
   /* [DevBDW+]:  Format conversion to or from HF (Half Float) must be 
DWord-aligned and
  strided by a DWord on the destination. */
-  GBE_ASSERT(sel.hasHalfType());
+  GBE_ASSERTM(sel.hasHalfType(), "Half precision not supp

Re: [Beignet] beignet and LLVM 4

2017-09-10 Thread Rebecca N. Palmer

(I'm asking for Debian - Fedora is already using LLVM 4 + git beignet)

On 08/09/17 07:57, Yang, Rong R wrote:

 LLVM 5. 0 has been released, we are planning to release a minor release 
1.3.2 to support LLVM 4.0 and
LLVM 5.0 after beignet's LLVM5.0 patches are ready.


Roughly how long do you expect this to take?

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Status of cl_khr_gl_sharing

2017-08-04 Thread Rebecca N. Palmer
The *missing* functions aren't likely to be a problem: as far as I can 
tell, 
http://sources.debian.net/src/forge/0.9.2-2/examples/opencl/cl_helpers.h/?hl=72#L72 
is the only place in Debian that tries to use one of them, and it 
correctly handles its absence.


I'm more worried about the potential consequences (if any) of 
clEnqueueAcquire/ReleaseGLObjects not actually doing anything.


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Status of cl_khr_gl_sharing

2017-08-01 Thread Rebecca N. Palmer
As beignet's Debian maintainer, I am considering whether to enable CL-GL 
sharing in our package, given its incomplete state.


Fedora do enable it, and have a Blender crash bug 
https://bugs.freedesktop.org/show_bug.cgi?id=101779 , though as I can't 
reproduce this I can't tell whether disabling it would help.


src/sl_gl_api.c contains clEnqueueAcquire/ReleaseGLObjects which do 
nothing and return success, and are marked "XXX NULL function 
currently."  Is 'nothing' the correct thing for these to do on Intel 
hardware, or is this a potential problem?


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] How widespread is "Exec event...error...-5" (#98647 / #100639)?

2017-04-27 Thread Rebecca N. Palmer

On 27/04/17 07:49, Yang, Rong R wrote:

As we know, this issue is introduce by commit 
https://cgit.freedesktop.org/beignet/commit/?id=ff57cee0519db1287053c7c05a2cb4e9700d3334.

To clarify, this commit is not only for ocl 2.0, ocl 1.2 also need it for null 
point check in the opencl kernel.


Does this imply that disabling softpin may break things even on non-2.0 
hardware (i.e. isn't a good idea)?



 Have you found the cases which Disable HAS_BO_SET_SOFTPIN could fix but 
commit
https://cgit.freedesktop.org/beignet/commit/?id=8b04f0be372da8eabdc93d6ae1b81a3c83cba284
 still exist?


Not on my own device (i5-3230M), but
https://bugs.freedesktop.org/show_bug.cgi?id=100639 reports that for 
them (i5-4250U):

-stock 1.3 = nothing works
-1.3 + 8b04f0b = nothing works
-1.3 + disable softpin = some things work, but clFFT still doesn't

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] How widespread is "Exec event...error...-5" (#98647 / #100639)?

2017-04-25 Thread Rebecca N. Palmer
Debian 9 (stretch)/Ubuntu 17.04 (zesty) have beignet 1.3.0, libdrm 
2.4.74/2.4.76 and Linux 4.9/4.10.


On some hardware (possibly all of Ivy Bridge and Haswell??), this does 
not work at all: attempting to run anything fails with


drm_intel_gem_bo_context_exec() failed: Device or resource busy
Beignet: "Exec event 0x error, type is 4592, error staus is -5"

https://bugs.freedesktop.org/show_bug.cgi?id=98647
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=860805

As the package maintainer, I'd like to fix this.  I am aware of two 
fixes, either of which works for me, but 
https://bugs.freedesktop.org/show_bug.cgi?id=100639 reports that neither 
of them is perfect:


 - The fix used in 1.4: 
https://cgit.freedesktop.org/beignet/commit/?id=8b04f0be372da8eabdc93d6ae1b81a3c83cba284


- Disable HAS_BO_SET_SOFTPIN: fixes more (but still not everything), but 
also disables some functionality (OpenCL 2.0).  This is probably why the 
bug only appears in recent Linux versions, and hence was missed when I 
tested the packages in a chroot on Linux 3.16: softpin was only 
introduced in Linux 4.5.


Has anyone other than its reporter seen #100639 (i.e. this error 
persisting after applying the 1.4 fix, particularly when using multiple 
OpenCL kernels such as in clFFT)?


Any other suggestions?

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] issue with cl

2017-04-10 Thread Rebecca N. Palmer
Sorry - I posted that suggestion in a hurry, and forgot that Haswell was 
broken in Linux <=4.2.


I'll investigate, as this probably means Debian 9/Ubuntu 17.04's beignet 
is totally broken...


On 10/04/17 08:40, Michal B wrote:

Hi Rebecca,

i submitted my bug to:
https://bugs.freedesktop.org/show_bug.cgi?id=100639

i compiled myself kernel because i needed specific hardware support -
kernel 3.16 is too old for my hardware,
something is completely wrong, clinfo is not working ...

results:

root@debianNUC:~/# clinfo
Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (3, 7, 5)
This can usually be fixed by upgrading Linux to >= 4.2,
see /usr/share/doc/beignet-dev/Beignet.html or
https://www.freedesktop.org/wiki/Software/Beignet/
Beignet: disabling non-working device
Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (3, 7, 5)
See README.md or http://www.freedesktop.org/wiki/Software/Beignet/
Beignet: disabling non-working device
Number of platforms: 2
  Platform Profile: FULL_PROFILE
  Platform Version: OpenCL 1.2 beignet 1.3
  Platform Name: Intel Gen OCL Driver
  Platform Vendor: Intel
  Platform Extensions:
cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics
cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics
cl_khr_byte_addressable_store cl_khr_3d_image_writes
cl_khr_image2d_from_buffer cl_khr_depth_images cl_khr_spir cl_khr_icd
cl_intel_accelerator cl_intel_subgroups cl_intel_subgroups_short
  Platform Profile: FULL_PROFILE
  Platform Version: OpenCL 1.2 beignet 1.3
  Platform Name: Intel Gen OCL Driver
  Platform Vendor: Intel
  Platform Extensions:
cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics
cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics
cl_khr_byte_addressable_store cl_khr_3d_image_writes
cl_khr_image2d_from_buffer cl_khr_depth_images cl_khr_spir cl_khr_icd
cl_intel_accelerator cl_intel_subgroups cl_intel_subgroups_short
cl_khr_gl_sharing


  Platform Name: Intel Gen OCL Driver
Number of devices: 0
  Platform Name: Intel Gen OCL Driver
Number of devices: 0

root@debianNUC:~/# uname -na
Linux debianNUC 3.18.48 #1 SMP Sat Feb 18 12:56:22 CET 2017 x86_64 GNU/Linux




2017-04-10 9:21 GMT+02:00 Rebecca N. Palmer <rebecca_pal...@zoho.com>:

If you can, revert to the jessie (not -backports) kernel: that fixes this
issue for me (on Ivybridge).



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] issue with cl

2017-04-10 Thread Rebecca N. Palmer
If you can, revert to the jessie (not -backports) kernel: that fixes 
this issue for me (on Ivybridge).


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH newRT] Add cl_gen_device_common.h file.

2017-04-07 Thread Rebecca N. Palmer

I think a better choice is to make OpenCL 1.2 also use 64bit address. So that 
we have a unified address_bits across 1.2 and 2.0.
But using 64bit address would hurt some performance. I think we need more work on 64bit 
optimization to make "unified address_bits across different ocl version" affect 
as less as we can.


Will that break older hardware (no emitUntypedReadA64Instruction) like 
unconditionally enabling OpenCL 2.0 does, or is this only intended to 
apply to 2.0-capable hardware?


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH newRT] Add cl_gen_device_common.h file.

2017-03-31 Thread Rebecca N. Palmer

+#ifdef ENABLE_OPENCL_20
+.address_bits = 64,
+#else
+.address_bits = 32,
+#endif

Should that be 32 unconditionally, like it now is in master?
https://lists.freedesktop.org/archives/beignet/2017-February/008565.html

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: Fix broken link

2017-03-15 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -283,7 +283,7 @@ Documents for OpenCL application developers
 - [[OpenGL Buffer Sharing|Beignet/howto/gl-buffer-sharing-howto]]
 - [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]]
 - [[Stand Alone Unit Test|Beignet/howto/stand-alone-utest-howto]]
-- [[Android build|Beignet/android-build-howto]]
+- [[Android build|Beignet/howto/android-build-howto]]
 
 The wiki URL is as below:
 
[http://www.freedesktop.org/wiki/Software/Beignet/](http://www.freedesktop.org/wiki/Software/Beignet/)

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: Fix spelling and grammar

2017-03-15 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/docs/howto/gl-buffer-sharing-howto.mdwn
+++ b/docs/howto/gl-buffer-sharing-howto.mdwn
@@ -1,16 +1,16 @@
 GL Buffer Sharing HowTo
 =
 
-Beignet now support cl_khr_gl_sharing partially(the most commonly used part), 
which is an offcial
+Beignet now supports cl_khr_gl_sharing partially (the most commonly used 
part), which is an official
 extension of Khronos OpenCL. With this extension, Beignet can create memory 
object from OpenGL/OpenGL
-ES buffer, texture or renderbuffer object with zero-copy. Currently, we just 
support create memory
-object from GL buffer object or 2d texture(the most common target type). We 
will support creating
+ES buffer, texture or renderbuffer object with zero-copy. Currently, we just 
support creating memory
+object from GL buffer object or 2d texture (the most common target type). We 
will support creating
 from other GL target type if necessary.
 
 Prerequisite
 
 
-Mesa GL library and Mesa EGL libray are required. Both version should be 
greater or equal than
+Mesa GL library and Mesa EGL library are required. Both version should be 
greater or equal than
 13.0.0.
 
 Steps
@@ -18,7 +18,7 @@ Steps
 
 A typical procedure of using cl_khr_gl_sharing is as below:
 
-- Basic egl routine(eglGetDisplay, eglInitialize, eglCreateContext...).
+- Basic egl routine (eglGetDisplay, eglInitialize, eglCreateContext...).
 
 - Create GL 2d texture in normal OpenGL way.
 
@@ -44,7 +44,7 @@ A typical procedure of using cl_khr_gl_sharing is as below:
 
 - Access this cl image object as an usual cl image object.
 
-- Relase cl image object by calling clEnqueueReleaseGLObjects.
+- Release cl image object by calling clEnqueueReleaseGLObjects.
 
 - Ensure any pending OpenCL operations which access this cl image object have 
completed by clFinish.
 
@@ -54,7 +54,7 @@ Sample code
 ---
 
 We have developed an example showing how to utilize cl_khr_gl_sharing in 
examples/gl_buffer_sharing
-directory. A cl image object is created from a gl 2d texutre and processed by 
OpenCL kernel, then
+directory. A cl image object is created from a gl 2d texture and processed by 
OpenCL kernel, then
 is shown on screen.
 
 Steps to build and run this example:

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH RFC] Add AppStream metadata

2017-02-28 Thread Rebecca N. Palmer

On 28/02/17 08:40, Yang, Rong R wrote:

metadata_license is just the license for this XML, right?

Yes.

If so, MIT is ok for me, what your opinion?
I have no problem with id section.

Fine by me.


open(os.path.join(source_directory,"src/cl_device_data.h"),"r",encoding= 
'utf-8')
has an error in python2.x:

Sorry, and that fix looks OK (though I haven't tried it).

Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH V4] Enable OpenCL 2.0 only where supported

2017-02-10 Thread Rebecca N. Palmer

Yang, Rong R wrote:

Because use -cl-std=CL1.2 by default when OpenCL 2.0 enabled, I prefer to 
always report address_bits = 32 now.
OpenCL spec consider only one address bits in one device, but when GEN9 now 
support both 32 bits and 64 bits address, so there is no way to comply with 
spec.


As previously noted 
(https://lists.freedesktop.org/archives/beignet/2017-January/008517.html), 
the spec actually says *default* address space size 
(https://www.khronos.org/registry/OpenCL/specs/opencl-2.0.pdf page 64), 
so I agree it should be 32, and device->address_bits 
(src/cl_get_gt_device.h:45) is where this is set.



An other issue is that beignet OpenCL 2.0 don't support i386 system now, maybe 
we also need set CAN_OPENCL_20 to off in i386 system.


What happens if you try - explicit error or mystery crash?  Does a 
2.0-capable beignet+hardware work if you only actually use 1.2? 
(Debian's 2.0-enabled beignet does work in an i386 chroot, but that's on 
my non-2.0-capable hardware, and only major bugs are allowed to be fixed 
during freeze.)



And also need to update readme after this patch merged.


https://sources.debian.net/src/beignet/1.3.0-1/debian/patches/opencl2-runtime-detection.patch/#L351 
is what I used, though you'll want to remove the (Debian-specific) 
jessie-backports reference.


Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH v2] Enable OpenCL 2.0 only where supported

2017-01-24 Thread Rebecca N. Palmer
[New recipients: this is a reply to
https://lists.freedesktop.org/archives/beignet/2017-January/008516.html ]

On 24/01/17 08:58, Pan, Xiuli wrote:
> And we have a conformance test with these patches, and the only problem is 
> that the sizeof(void *):
> On GEN9 machine, the device will return 32 due the CL1.2 backend is using but 
> the host API query one returns 64, 
> and if -cl-std=2.0 is using the sizeof will be 64. The API could only return 
> one value but our backend now have two kinds.

The spec for clGetDeviceInfo
(https://www.khronos.org/registry/OpenCL/specs/opencl-2.0.pdf page 64)
says CL_DEVICE_ADDRESS_BITS is the *default* address space size,
i.e. 32 here.

As the only place device->address_bits (from
src/cl_get_gt_device.h:45) is used is as the return value of that
clGetDeviceInfo call, making it unconditionally 32 should be enough
to implement this.

I'm guessing this isn't a big problem in practice (and hence, currently
do *not* intend to change the Debian package for this release),
but would welcome other opinions.

A quick check of
codesearch.debian.net suggests only 2 of their ~70 OpenCL-using
packages use it for something other than just "print device info",
and neither of them looks like this would add a new problem:
- clblas, probably only in the AMD-hardware-specific parts
- gromacs, in a way that looks like a bug anyway, since they appear
to mean CL_DEVICE_MAX_WORK_ITEM_SIZES but never check that -
http://sources.debian.net/src/gromacs/2016.1-2/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp/?hl=524#L524

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH v2] Enable OpenCL 2.0 only where supported

2017-01-23 Thread Rebecca N. Palmer
> regarding whether OpenCL 2.0
> is compiled or not I would rather think of something like that:
> 
> – if llvm39 and libdrm2466 are found, compile with OpenCL 2.0 by default.
> – if they’re not, compile without it. Issue an error message if
> -DENABLE_OPENCL_20=1 was passed.

Agreed - we actually do that in Debian, but our current method
(https://anonscm.debian.org/cgit/pkg-opencl/beignet.git/tree/debian/rules#n16)
involves Debian-specific tools.

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Make CL-GL sharing available via ICD

2017-01-22 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
(Warning: has not been tested)

diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c
index 7b3600c..e4daf79 100644
--- a/src/cl_khr_icd.c
+++ b/src/cl_khr_icd.c
@@ -18,10 +18,14 @@
 
 #include "cl_platform_id.h"
 #include "CL/cl_intel.h" // for clGetKernelSubGroupInfoKHR
-/* The interop functions are not implemented in Beignet */
-#define CL_GL_INTEROP(x) NULL
-/* OpenCL 1.2 is not implemented in Beignet */
-#define CL_1_2_NOTYET(x) NULL
+/* The interop functions are only available if sharing is enabled */
+#ifdef HAS_GL_EGL
+#define CL_GL_INTEROP(x) x
+#else
+#define CL_GL_INTEROP(x) (void *) NULL
+#endif
+/* These are not yet implemented in Beignet */
+#define CL_NOTYET(x) (void *) NULL
 
 /** Return platform list through ICD interface
  * This code is used only if a client is linked directly against the library
@@ -114,13 +118,13 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
   clGetExtensionFunctionAddress,
   CL_GL_INTEROP(clCreateFromGLBuffer),
   CL_GL_INTEROP(clCreateFromGLTexture2D),
-  CL_GL_INTEROP(clCreateFromGLTexture3D),
-  CL_GL_INTEROP(clCreateFromGLRenderbuffer),
-  CL_GL_INTEROP(clGetGLObjectInfo),
-  CL_GL_INTEROP(clGetGLTextureInfo),
+  CL_NOTYET(clCreateFromGLTexture3D),
+  CL_NOTYET(clCreateFromGLRenderbuffer),
+  CL_NOTYET(clGetGLObjectInfo),
+  CL_NOTYET(clGetGLTextureInfo),
   CL_GL_INTEROP(clEnqueueAcquireGLObjects),
   CL_GL_INTEROP(clEnqueueReleaseGLObjects),
-  CL_GL_INTEROP(clGetGLContextInfoKHR),
+  CL_NOTYET(clGetGLContextInfoKHR),
   (void *) NULL,
   (void *) NULL,
   (void *) NULL,
@@ -135,9 +139,9 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
   clEnqueueReadBufferRect,
   clEnqueueWriteBufferRect,
   clEnqueueCopyBufferRect,
-  CL_1_2_NOTYET(clCreateSubDevicesEXT),
-  CL_1_2_NOTYET(clRetainDeviceEXT),
-  CL_1_2_NOTYET(clReleaseDeviceEXT),
+  CL_NOTYET(clCreateSubDevicesEXT),
+  CL_NOTYET(clRetainDeviceEXT),
+  CL_NOTYET(clReleaseDeviceEXT),
 #ifdef CL_VERSION_1_2
   (void *) NULL,
   clCreateSubDevices,

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Clarify status of cl_khr_gl_sharing

2017-01-22 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
> -The release notes say cl_khr_gl_sharing is now supported, but 
> docs/Beignet.mdwn still says it isn't.
It exists but is off by default, probably because it isn't finished:
clEnqueueAcquireGLObjects / clEnqueueReleaseGLObjects are no-ops,
and clGetGLObjectInfo / clGetGLTextureInfo are missing entirely.

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 59abc45..7c01488 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -195,7 +195,7 @@ ELSE(XFIXES_FOUND)
 ENDIF(XFIXES_FOUND)
 ENDIF(X11_FOUND)
 
-OPTION(ENABLE_GL_SHARING "cl_khr_gl_sharing" OFF)
+OPTION(ENABLE_GL_SHARING "Enable (incomplete) cl_khr_gl_sharing" OFF)
 
 IF(ENABLE_GL_SHARING)
   pkg_check_modules(OPENGL REQUIRED gl)
diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
index 5c62b4c..3577d57 100644
--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -222,10 +222,8 @@ Known Issues
   This loses some precision but gains performance.
 
 * cl\_khr\_gl\_sharing.
-  This extension highly depends on mesa support. It seems that mesa would not 
provide
-  such type of extensions, we may have to hack with mesa source code to 
support this
-  extension. This feature used to work with a previous mesa git version. But 
now, it's
-  simply broken.
+  This extension is disabled by default, as it is only partially implemented.
+  It can be enabled by passing -DENABLE_GL_SHARING=ON to cmake.
 
 Project repository
 --

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Fix typo

2017-01-22 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -308,7 +308,7 @@ namespace gbe
 if(StrTy)
   return getTypeByteSize(unit,StrTy);
   }
-  GBE_ASSERTM(false, "Unspported type name");
+  GBE_ASSERTM(false, "Unsupported type name");
   return 0;
   }
 #undef TYPESIZEVEC

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] FindLLVM: Accept LLVM 3.9

2017-01-22 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
--
(Possibly should be higher in the priority order, since it's required for 
OpenCL 2.0?)

--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -8,12 +8,12 @@
 # LLVM_FOUND   - True if llvm found.
 if (LLVM_INSTALL_DIR)
   find_program(LLVM_CONFIG_EXECUTABLE
-   NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 
llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 
llvm-config-3.5 llvm-config-34 llvm-config-3.4
+   NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 
llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 
llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
DOC "llvm-config executable"
PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
 else (LLVM_INSTALL_DIR)
   find_program(LLVM_CONFIG_EXECUTABLE
-   NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 
llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 
llvm-config-3.5 llvm-config-34 llvm-config-3.4
+   NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 
llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 
llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
DOC "llvm-config executable")
 endif (LLVM_INSTALL_DIR)
 

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v2] Enable OpenCL 2.0 only where supported

2017-01-22 Thread Rebecca N. Palmer
This allows a single beignet binary to both offer 2.0 where
available, and still work on older hardware.
(2.0 uses 64 bit pointers, which assert-fail on Ivybridge:
backend/src/backend/gen_context.cpp:2259).

V2: 
Default to 1.2 when -cl-std is not set (required by the OpenCL
spec, and also likely to be faster).  Update documentation.
Unconditionally build 2.0-only tests, but skip them on non-2.0
hardware.  Add -cl-std=2.0 to tests that need it.

Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -231,19 +231,8 @@ IF (EXPERIMENTAL_DOUBLE)
   ADD_DEFINITIONS(-DENABLE_FP64)
 ENDIF(EXPERIMENTAL_DOUBLE)
 
-OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF)
+OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" ON)
 IF (ENABLE_OPENCL_20)
-  Find_Program(LSPCI lspci)
-  IF (NOT LSPCI)
-MESSAGE(FATAL_ERROR "Looking for lspci - not found")
-  ENDIF (NOT LSPCI)
-  EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh"
-  RESULT_VARIABLE SUPPORT_OCL20_DEVICE
-  OUTPUT_VARIABLE PCI_ID_NOT_USED)
-
-  IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
-MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL 2.0 now, 
your device don't support.")
-  ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
 
   IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
 MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later to 
enable OpenCL 2.0.")
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -31,6 +31,7 @@
 #include "ir/value.hpp"
 #include "ir/unit.hpp"
 #include "ir/printf.hpp"
+#include "../src/cl_device_data.h"
 
 #ifdef GBE_COMPILER_AVAILABLE
 #include "llvm/llvm_to_gen.hpp"
@@ -855,6 +856,7 @@ namespace gbe {
  size_t *errSize,
  uint32_t )
   {
+uint32_t maxoclVersion = oclVersion;
 std::string pchFileName;
 bool findPCH = false;
 #if defined(__ANDROID__)
@@ -1022,15 +1024,9 @@ EXTEND_QUOTE:
 }
 
 if (useDefaultCLCVersion) {
-#ifdef ENABLE_OPENCL_20
-  clOpt.push_back("-D__OPENCL_C_VERSION__=200");
-  clOpt.push_back("-cl-std=CL2.0");
-  oclVersion = 200;
-#else
   clOpt.push_back("-D__OPENCL_C_VERSION__=120");
   clOpt.push_back("-cl-std=CL1.2");
   oclVersion = 120;
-#endif
 }
 //for clCompilerProgram usage.
 if(temp_header_path){
@@ -1061,7 +1057,12 @@ EXTEND_QUOTE:
   clOpt.push_back("-include-pch");
   clOpt.push_back(pchFileName);
 }
-
+if (oclVersion > maxoclVersion){
+  if (err && stringSize > 0 && errSize) {
+ *errSize = snprintf(err, stringSize, "Requested OpenCL version %lf is 
higher than maximum supported version %lf\n", 
(float)oclVersion/100.0,(float)maxoclVersion/100.0);
+  }
+  return false;
+}
 return true;
   }
 
@@ -1076,7 +1077,7 @@ EXTEND_QUOTE:
 std::vector clOpt;
 std::string dumpLLVMFileName, dumpASMFileName;
 std::string dumpSPIRBinaryName;
-uint32_t oclVersion = 0;
+uint32_t oclVersion = MAX_OCLVERSION(deviceID);
 if (!processSourceAndOption(source, options, NULL, clOpt,
 dumpLLVMFileName, dumpASMFileName, 
dumpSPIRBinaryName,
 optLevel,
@@ -1139,7 +1140,7 @@ EXTEND_QUOTE:
 std::vector clOpt;
 std::string dumpLLVMFileName, dumpASMFileName;
 std::string dumpSPIRBinaryName;
-uint32_t oclVersion = 0;
+uint32_t oclVersion = MAX_OCLVERSION(deviceID);
 if (!processSourceAndOption(source, options, temp_header_path, clOpt,
 dumpLLVMFileName, dumpASMFileName, 
dumpSPIRBinaryName,
 optLevel, stringSize, err, errSize, 
oclVersion))
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -363,5 +363,7 @@
 
 #define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || 
IS_KABYLAKE(devid))
 
+#define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
+
 #endif /* __CL_DEVICE_DATA_H__ */
 
--- a/src/cl_gen9_device.h
+++ b/src/cl_gen9_device.h
@@ -27,5 +27,7 @@
 .max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul,
 .global_mem_size = 4 * 1024 * 1024 * 1024ul,
 
+#define GEN9_DEVICE 1
 #include "cl_gt_device.h"
+#undef GEN9_DEVICE
 
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -16,7 +16,13 @@
  *
  * Author: Benjamin Segovia <benjamin.sego...@intel.com>
  */
-
+#ifdef GEN9_DEVICE
+#define LIBCL_VERSION_STRING GEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING GEN9_LIBCL_C_VERSION_STRING
+#else
+#define LIBCL_VERSION_STRING NONGEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING NONGEN9_LIBCL_C_VERSION_STRING
+#endif
 /* Common fields for both all GT devices (IVB / SNB) */
 .device_type = C

Re: [Beignet] Building beignet with OpenCL 2.0 support in distros

2017-01-21 Thread Rebecca N. Palmer
As for the "OpenCL 2.0 = worse performance" problem, the OpenCL spec
actually _requires_ 2.0 compilers to default to 1.2, and only enable
2.0 if explicitly asked to:
https://www.khronos.org/registry/OpenCL/specs/opencl-2.0.pdf page 203

beignet currently defaults to 2.0 if it is enabled, but this is easy
to change: the default is set at backend/src/backend/program.cpp:1026

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Building beignet with OpenCL 2.0 support in distros

2017-01-21 Thread Rebecca N. Palmer
> I have a first attempt at "enable/disable 2.0 at run time" written, but 
> haven't yet tested it. 

It worked for me, and here it is; testing on other hardware (in
particular, hardware that supports 2.0: the last 15 tests of the test
suite appear to be the ones that test that) would be appreciated.

(Upstream may not want to take this as-is, as it doesn't scale very
well to more than one 2.0-supporting hardware generation: it's
intended as a quick relatively-low-risk fix for packagers.)

Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>

--- beignet-1.3.0.orig/CMakeLists.txt
+++ beignet-1.3.0/CMakeLists.txt
@@ -231,19 +231,8 @@ IF (EXPERIMENTAL_DOUBLE)
   ADD_DEFINITIONS(-DENABLE_FP64)
 ENDIF(EXPERIMENTAL_DOUBLE)
 
-OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF)
+OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" ON)
 IF (ENABLE_OPENCL_20)
-  Find_Program(LSPCI lspci)
-  IF (NOT LSPCI)
-MESSAGE(FATAL_ERROR "Looking for lspci - not found")
-  ENDIF (NOT LSPCI)
-  EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh"
-  RESULT_VARIABLE SUPPORT_OCL20_DEVICE
-  OUTPUT_VARIABLE PCI_ID_NOT_USED)
-
-  IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
-MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL 2.0 now, 
your device don't support.")
-  ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
 
   IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
 MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later to 
enable OpenCL 2.0.")
--- beignet-1.3.0.orig/backend/src/backend/program.cpp
+++ beignet-1.3.0/backend/src/backend/program.cpp
@@ -31,6 +31,7 @@
 #include "ir/value.hpp"
 #include "ir/unit.hpp"
 #include "ir/printf.hpp"
+#include "../src/cl_device_data.h"
 
 #ifdef GBE_COMPILER_AVAILABLE
 #include "llvm/llvm_to_gen.hpp"
@@ -855,6 +856,7 @@ namespace gbe {
  size_t *errSize,
  uint32_t )
   {
+uint32_t maxoclVersion = oclVersion;
 std::string pchFileName;
 bool findPCH = false;
 #if defined(__ANDROID__)
@@ -1023,14 +1025,18 @@ EXTEND_QUOTE:
 
 if (useDefaultCLCVersion) {
 #ifdef ENABLE_OPENCL_20
+  if(maxoclVersion >= 200){
+#else
+  if(0){
+#endif
   clOpt.push_back("-D__OPENCL_C_VERSION__=200");
   clOpt.push_back("-cl-std=CL2.0");
   oclVersion = 200;
-#else
+  }else{
   clOpt.push_back("-D__OPENCL_C_VERSION__=120");
   clOpt.push_back("-cl-std=CL1.2");
   oclVersion = 120;
-#endif
+  }
 }
 //for clCompilerProgram usage.
 if(temp_header_path){
@@ -1061,7 +1067,12 @@ EXTEND_QUOTE:
   clOpt.push_back("-include-pch");
   clOpt.push_back(pchFileName);
 }
-
+if (oclVersion > maxoclVersion){
+  if (err && stringSize > 0 && errSize) {
+ *errSize = snprintf(err, stringSize, "Requested OpenCL version %lf is 
higher than maximum supported version %lf\n", 
(float)oclVersion/100.0,(float)maxoclVersion/100.0);
+  }
+  return false;
+}
 return true;
   }
 
@@ -1076,7 +1087,7 @@ EXTEND_QUOTE:
 std::vector clOpt;
 std::string dumpLLVMFileName, dumpASMFileName;
 std::string dumpSPIRBinaryName;
-uint32_t oclVersion = 0;
+uint32_t oclVersion = MAX_OCLVERSION(deviceID);
 if (!processSourceAndOption(source, options, NULL, clOpt,
 dumpLLVMFileName, dumpASMFileName, 
dumpSPIRBinaryName,
 optLevel,
@@ -1139,7 +1150,7 @@ EXTEND_QUOTE:
 std::vector clOpt;
 std::string dumpLLVMFileName, dumpASMFileName;
 std::string dumpSPIRBinaryName;
-uint32_t oclVersion = 0;
+uint32_t oclVersion = MAX_OCLVERSION(deviceID);
 if (!processSourceAndOption(source, options, temp_header_path, clOpt,
 dumpLLVMFileName, dumpASMFileName, 
dumpSPIRBinaryName,
 optLevel, stringSize, err, errSize, 
oclVersion))
--- beignet-1.3.0.orig/src/cl_device_data.h
+++ beignet-1.3.0/src/cl_device_data.h
@@ -363,5 +363,7 @@
 
 #define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || 
IS_KABYLAKE(devid))
 
+#define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
+
 #endif /* __CL_DEVICE_DATA_H__ */
 
--- beignet-1.3.0.orig/src/cl_gen9_device.h
+++ beignet-1.3.0/src/cl_gen9_device.h
@@ -27,5 +27,7 @@
 .max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul,
 .global_mem_size = 4 * 1024 * 1024 * 1024ul,
 
+#define GEN9_DEVICE 1
 #include "cl_gt_device.h"
+#undef GEN9_DEVICE
 
--- beignet-1.3.0.orig/src/cl_gt_device.h
+++ beignet-1.3.0/src/cl_gt_device.h
@@ -16,7 +16,13 @@
  *
  * Author: Benjamin Segovia <benjamin.sego...@intel.com>
  */
-
+#ifdef GEN9_DEVICE
+#define LIBCL_VERSION_STRING GEN9_LI

Re: [Beignet] Building beignet with OpenCL 2.0 support in distros

2017-01-21 Thread Rebecca N. Palmer
I have a first attempt at "enable/disable 2.0 at run time" written, but 
haven't yet tested it.


On 21/01/17 16:08, Bruno Pagani wrote:

Le 21/01/2017 à 16:55, Rebecca N. Palmer a écrit :


On 21/01/17 15:40, Bruno Pagani wrote:

Le 21/01/2017 à 16:20, Rebecca N. Palmer a écrit :


is there any downside in compiling with
OpenCL 2.0 support,

Yes - on older (Ivybridge/Haswell - no emitUntypedReadA64Instruction)
hardware, a 2.0-enabled beignet won't work, at all.


That’s what I’ve feared but after trying it here I haven’t encountered
more issues than with a non-2.0-enabled beignet.


Trying it on what hardware?  There might be hardware (all of gen8 if
this assert(0) is the only failure point) where a 2.0 build doesn't
crash outright but also doesn't have working 2.0.


Haswell (HD4600).


Weird - are you sure darktable loads OpenCL by default?  For me 
(Ivybridge M GT2), everything that does immediately crashes with


clinfo: 
/home/rnpalmer/Debian/builds/stackbuild/beignet/backend/src/backend/gen_context.cpp:2259: 
virtual void gbe::GenContext::emitUntypedReadA64Instruction(const 
gbe::SelectionInstruction&): Assertion `0' failed.

Aborted

and that assert(0) only gets replaced by a real implementation in gen8 
and above, not in Haswell (gen75).


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Building beignet with OpenCL 2.0 support in distros

2017-01-21 Thread Rebecca N. Palmer

On 21/01/17 15:40, Bruno Pagani wrote:

Le 21/01/2017 à 16:20, Rebecca N. Palmer a écrit :


is there any downside in compiling with
OpenCL 2.0 support,

Yes - on older (Ivybridge/Haswell - no emitUntypedReadA64Instruction)
hardware, a 2.0-enabled beignet won't work, at all.


That’s what I’ve feared but after trying it here I haven’t encountered
more issues than with a non-2.0-enabled beignet.


Trying it on what hardware?  There might be hardware (all of gen8 if 
this assert(0) is the only failure point) where a 2.0 build doesn't 
crash outright but also doesn't have working 2.0.



What would be the point of installing both [2.0 and non-2.0]? Just so that 
softwares not
needing 2.0 don’t get hurt on performances?


That, and users (of software with non-broken empty platform handling) 
not having to think about which one they actually need.  (I discovered 
the existence of broken empty platform handling while considering 
whether to create an opencl-icd-all package, which would depend on 
beignet, mesa-opencl-icd and pocl.)



I at least need to decide quickly, as Debian freezes this week.

But does that really applies to Debian?
Because AFAIU, there is no llvm 3.9 in Debian Stretch:


There is LLVM 3.9 in Debian Stretch (I already build beignet with it), 
it just isn't the default (the one that gets the plain 'llvm' name): 
https://packages.debian.org/stretch/llvm-3.9


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Building beignet with OpenCL 2.0 support in distros

2017-01-21 Thread Rebecca N. Palmer

is there any downside in compiling with
OpenCL 2.0 support,
Yes - on older (Ivybridge/Haswell - no emitUntypedReadA64Instruction) 
hardware, a 2.0-enabled beignet won't work, at all.


I see two possible approaches:

-Build two beignet packages, with and without 2.0 enabled, and make the 
with-2.0 one only accept 2.0-capable hardware (cl_get_gt_device() in 
src/cl_device_id.c).
They can be coinstallable if they use different BEIGNET_INSTALL_DIR and 
ICD_FILE_NAME (I suggest choosing names that make the non-2.0 one sort 
first, so software that doesn't need 2.0 doesn't get the performance 
penalty of supporting it), but should be separate packages because some 
OpenCL-using software errors out if any installed platform has 0 devices 
(CL_DEVICE_NOT_FOUND - e.g. 
http://sources.debian.net/src/asl/0.1.7-2/src/acl/aclHardware.cxx/?hl=69#L69 
, 
http://sources.debian.net/src/woo/1.0%2Bdfsg1-1/core/Scene.cpp/?hl=219#L207 
)


-As suggested above, move the "does this hardware support 2.0?" check to 
run time.

I'm currently working on this: it looks doable, but is the riskier option.

I at least need to decide quickly, as Debian freezes this week.

Two other issues I've found:
-CMake/FindLLVM.cmake doesn't find LLVM 3.9, even though this version is 
_required_ for OpenCL 2.0.
-The release notes say cl_khr_gl_sharing is now supported, but 
docs/Beignet.mdwn still says it isn't.


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH RFC] Add AppStream metadata

2017-01-15 Thread Rebecca N. Palmer
AppStream is a standard for software metadata,
including what hardware a driver supports:
https://www.freedesktop.org/software/appstream/docs/chap-Metadata.html

Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
Before this is pushed,  needs to be filled in:
AppStream prefer a permissive license (such as CC0-1.0 or MIT)
for metadata to allow it to easily be combined into a
distribution-wide file, and I hereby agree to this for the
contents of this patch, but as the supported hardware list is
extracted from the source (to make sure it is kept up to date),
this might also need your agreement.

 recommends the reverseddomainname.softwarename format simply as
a convenient way to ensure it is unique: it need not be related to
.  If you prefer to use one of your other domains (e.g.
org.01.beignet) you need to change all the places it appears in
this patch, including the filename.

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a24ccb9..aa9a32d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,6 +37,7 @@ INCLUDE (GNUInstallDirs OPTIONAL)
 # support old CMake without GNUInstallDirs
 if (NOT CMAKE_INSTALL_FULL_LIBDIR)
   set (CMAKE_INSTALL_FULL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib")
+  set (CMAKE_INSTALL_FULL_DATADIR "${CMAKE_INSTALL_PREFIX}/share")
   set (BEIGNET_LIBRARY_ARCHITECTURE "")
 else (NOT CMAKE_INSTALL_FULL_LIBDIR)
   set (BEIGNET_LIBRARY_ARCHITECTURE "${CMAKE_LIBRARY_ARCHITECTURE}")
@@ -317,6 +318,10 @@ IF(BUILD_EXAMPLES)
 ADD_SUBDIRECTORY(examples)
 ENDIF(BUILD_EXAMPLES)
 
+add_custom_target(metainfo ALL
+  COMMAND ${PYTHON_EXECUTABLE} 
${CMAKE_CURRENT_SOURCE_DIR}/update_metainfo_xml.py 
"${LIBCL_DRIVER_VERSION_MAJOR}.${LIBCL_DRIVER_VERSION_MINOR}.${LIBCL_DRIVER_VERSION_PATCH}"
 ${CMAKE_CURRENT_BINARY_DIR})
+install (FILES ${CMAKE_CURRENT_BINARY_DIR}/com.intel.beignet.metainfo.xml 
DESTINATION ${CMAKE_INSTALL_FULL_DATADIR}/metainfo)
+
 SET(CPACK_SET_DESTDIR ON)
 SET(CPACK_PACKAGE_VERSION_MAJOR "${LIBCL_DRIVER_VERSION_MAJOR}")
 SET(CPACK_PACKAGE_VERSION_MINOR "${LIBCL_DRIVER_VERSION_MINOR}")
diff --git a/com.intel.beignet.metainfo.xml.in 
b/com.intel.beignet.metainfo.xml.in
new file mode 100644
index 000..66b74d0
--- /dev/null
+++ b/com.intel.beignet.metainfo.xml.in
@@ -0,0 +1,18 @@
+
+
+com.intel.beignet
+Beignet
+OpenCL (GPU compute) driver for Intel GPUs
+This allows using Intel integrated GPUs for general computation, 
speeding up some applications.
+
+@modalias_list@
+
+
+LGPL-2.1+
+https://www.freedesktop.org/wiki/Software/Beignet/
+https://bugs.freedesktop.org/buglist.cgi?product=Beignet=Beignet=---
+Intel
+
+
+
+
diff --git a/update_metainfo_xml.py b/update_metainfo_xml.py
new file mode 100755
index 000..7d5278c
--- /dev/null
+++ b/update_metainfo_xml.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+
+import re
+import sys
+import os.path
+
+if len(sys.argv) != 3:
+raise TypeError("requires version_string and output_directory")
+version_string = sys.argv[1]
+output_directory = sys.argv[2]
+source_directory = os.path.dirname(sys.argv[0])
+source_file = 
open(os.path.join(source_directory,"src/cl_device_data.h"),"r",encoding = 
'utf-8')
+device_ids = []
+supported = False # first few devices in the file aren't supported
+for line in source_file:
+device_id = 
re.match(r"#define\s+PCI_CHIP_([A-Za-z0-9_]+)\s+0x([0-9A-Fa-f]+)",line)
+if device_id is None:
+continue
+if "IVYBRIDGE" in device_id.group(1):
+supported = True # start of supported devices
+if supported:
+device_ids.append(device_id.group(2).upper())
+source_file.close()
+modalias_list_string = 
"\n".join("pci:v8086d{}*".format(device_id) for 
device_id in sorted(device_ids))
+metadata_file_in = 
open(os.path.join(source_directory,"com.intel.beignet.metainfo.xml.in"),"r",encoding
 = 'utf-8')
+metadata_string = metadata_file_in.read()
+metadata_file_in.close()
+metadata_string = 
metadata_string.replace("@modalias_list@",modalias_list_string).replace("@version@",version_string)
+metadata_file_out = 
open(os.path.join(output_directory,"com.intel.beignet.metainfo.xml"),"w",encoding
 = 'utf-8')
+metadata_file_out.write(metadata_string)
+metadata_file_out.close()

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] apply 75cb7ca to 1.2 branch?

2017-01-15 Thread Rebecca N. Palmer
When beignet 1.2.1 is built with LLVM 3.9,
OCL_STRICT_CONFORMANCE=0 utest_run -c vload_test_uchar
(and other vload_test_*) hang during kernel compile, deep inside LLVM.

This does not happen in master; a bisect finds the fix to be 75cb7ca,
and applying this to 1.2.1 (at least in the Debian package) does fix
the problem, but I don't know why.

Hardware:
00:02.0 VGA compatible controller [0300]: Intel Corporation 3rd Gen Core 
processor Graphics Controller [8086:0166] (rev 09)

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Fail, don't assert, if unable to create context

2017-01-08 Thread Rebecca N. Palmer
As the "do we have any usable devices?" check uses this,
it needs to not crash even when we don't.

Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
The user who reported a crash here ( https://bugs.debian.org/848792 )
was using unsupported hardware, but I don't know whether this is
the reason they can't create a context.

diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index a8d554c..b8a1b52 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -134,11 +134,12 @@ intel_driver_aub_dump(driver);
 return 1;
 }
 
-static void
+static int
 intel_driver_context_init(intel_driver_t *driver)
 {
 driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
-assert(driver->ctx);
+if (!driver->ctx)
+  return 0;
 driver->null_bo = NULL;
 #ifdef HAS_BO_SET_SOFTPIN
 drm_intel_bo *bo = dri_bo_alloc(driver->bufmgr, "null_bo", 64*1024, 4096);
@@ -148,6 +149,7 @@ drm_intel_bo_set_softpin_offset(bo, 0);
 drm_intel_bo_disable_reuse(bo);
 driver->null_bo = bo;
 #endif
+return 1;
 }
 
 static void
@@ -168,7 +170,7 @@ driver->locked = 0;
 pthread_mutex_init(>ctxmutex, NULL);
 
 if (!intel_driver_memman_init(driver)) return 0;
-intel_driver_context_init(driver);
+if (!intel_driver_context_init(driver)) return 0;
 
 #if EMULATE_GEN
 driver->gen_ver = EMULATE_GEN;

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Weird clinfo ouput @ FreeBSD-11.0/Intel GPU

2016-10-18 Thread Rebecca N. Palmer
That looks like abi::__cxa_demangle is sometimes failing completely
(and possibly inconsistently, given that it's failing on a different
symbol this time).  Given that it only happens on FreeBSD, possibly
a libc++ (LLVM) vs libstdc++ (GCC) issue??

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Weird clinfo ouput @ FreeBSD-11.0/Intel GPU

2016-10-18 Thread Rebecca N. Palmer
> builtin_kernel_block_motion_estimate_intel()[SUCCESS]
> runtime_climage_from_boname()Unresolved symbol:
> _Z22__gen_ocl_write_imagef11ocl_image2dDv2_iDv4_f
> Aborting...

That message is from backend/src/llvm/llvm_gen_backend.hpp:97, and means 
it can't find that function (in its internal list, not the actual 
library files): possibly a name demangling problem?

To find out, try this:

--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -80,12 +80,13 @@ namespace gbe
 gbe::map map;
 OCLInstrinsic find(const std::string symbol) const {
   auto it = map.find(symbol);
+  std::string realFnName, stripName;
+  int status;

   if (it == map.end()) {
-int status;
 char *realName = abi::__cxa_demangle(symbol.c_str(), NULL, NULL, 
);
 if (status == 0) {
-  std::string realFnName(realName), stripName;
+  realFnName=std::string(realName);
   stripName = realFnName.substr(0, realFnName.find("("));
   it = map.find(stripName);
 }
@@ -94,7 +95,7 @@ namespace gbe
   // FIXME, should create a complete error reporting mechanism
   // when found error in beignet managed passes including Gen pass.
   if (it == map.end()) {
-std::cerr << "Unresolved symbol: " << symbol << std::endl;
+std::cerr << "Unresolved symbol: " << symbol << " " << realFnName << " 
" << stripName << " " << status << std::endl;
 std::cerr << "Aborting..." << std::endl;
 return GEN_OCL_NOT_FOUND;
   }

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Weird clinfo ouput @ FreeBSD-11.0/Intel GPU

2016-10-17 Thread Rebecca N. Palmer
Weird - setenv.sh has export OCL_IGNORE_SELF_TEST=1, which is supposed 
to turn that off (does setting environment variables that way not work 
in FreeBSD? if so, you'll also need to set the rest of setenv.sh in 
whatever way does work).  Does passing that explicitly (i.e. 
OCL_IGNORE_SELF_TEST=1 ./utest_run or OCL_IGNORE_SELF_TEST=1 clinfo) , 
or disabling it altogether (remove the device = 0; at src/cl_device_id.c 
line 893), give you any more?


(*Warning*: these do *not* fix the underlying "using __local returns 
nonsense" problem, only turn off the check so we can see whether the 
problem actually is that.)


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Weird clinfo ouput @ FreeBSD-11.0/Intel GPU

2016-10-16 Thread Rebecca N. Palmer
> Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (3, 7, 5)
> See README.md or http://www.freedesktop.org/wiki/Software/Beignet/
> Beignet: disabling non-working device

That looks like the "no __local on Haswell" issue (Linux fixed this in 4.2,
but FreeBSD might not have): try

OCL_IGNORE_SELF_TEST=1 clinfo

If you compiled Beignet yourself, also try running the test suite:
make utest
cd utests
. setenv.sh
./utest_run

If it is that, I'd expect most but not all of them to pass.

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Fix build failure with CMRT enabled

2016-10-12 Thread Rebecca N. Palmer
2baff9c moved mem->magic to cl_base_object.
---
(Or should this be CL_OBJECT_IS_MEM(mem), i.e. also checking the reference 
count?)

--- a/src/cl_cmrt.cpp
+++ b/src/cl_cmrt.cpp
@@ -256,7 +256,7 @@ cl_int cmrt_set_kernel_arg(cl_kernel k,
 result = cmrt_kernel->SetKernelArg(index, sz, value);
   else {
 cl_mem mem = *(cl_mem*)value;
-if (mem->magic == CL_MAGIC_MEM_HEADER) {
+if (((cl_base_object)mem)->magic == CL_MAGIC_MEM_HEADER) {
   if (!CreateCmrtMemory(mem))
 return CL_INVALID_ARG_VALUE;
 

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH v3] Utests: Allow testing cl_intel_accelerator via ICD

2016-10-11 Thread Rebecca N. Palmer
v3: Use extension check, not beignet check.  Treat claiming
to have the extension but not having the kernel as a failure.

---
(v2 was the un-numbered 10/10/16 08:07 version...which I subsequently
noticed was broken in that it assumed a non-NULL
clGetExtensionFunctionAddressForPlatform result meant the extension
was supported, which it doesn't in general,
https://www.khronos.org/registry/cl/sdk/2.1/docs/man/xhtml/clGetExtensionFunctionAddressForPlatform.html
 )

--- a/utests/builtin_kernel_block_motion_estimate_intel.cpp
+++ b/utests/builtin_kernel_block_motion_estimate_intel.cpp
@@ -8,6 +8,19 @@ OCLRELEASEACCELERATORINTEL * oclReleaseA
 
 void builtin_kernel_block_motion_estimate_intel(void)
 {
+  std::string extStr;
+  size_t param_value_size;
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, 
_value_size);
+  std::vector param_value(param_value_size);
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size,
+   param_value.empty() ? NULL : _value.front(), 
_value_size);
+  if (!param_value.empty())
+extStr = std::string(_value.front(), param_value_size-1);
+  // cl_intel_motion_estimation depends on cl_intel_accelerator, so we only 
need to check one
+  if (strstr(extStr.c_str(), "cl_intel_motion_estimation") == NULL) {
+printf("No cl_intel_motion_estimation, Skip!");
+return;
+  }
   char* built_in_kernel_names;
   size_t built_in_kernels_size;
   cl_int err = CL_SUCCESS;
@@ -21,7 +34,8 @@ void builtin_kernel_block_motion_estimat
   if (strstr(built_in_kernel_names, "block_motion_estimate_intel") == NULL)
   {
 free(built_in_kernel_names);
-return;
+printf("Can't find block_motion_estimate_intel built-in kernel");
+OCL_ASSERT(0);
   }
 
   cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, 
, built_in_kernel_names, );
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -287,7 +287,8 @@ set (utests_sources
   multi_queue_events.cpp
   compiler_mix.cpp
   compiler_math_3op.cpp
-  compiler_bsort.cpp)
+  compiler_bsort.cpp
+  builtin_kernel_block_motion_estimate_intel.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
@@ -328,7 +329,6 @@ else(GEN_PCI_ID)
 endif(GEN_PCI_ID)
 
 if (NOT_BUILD_STAND_ALONE_UTEST)
-  SET(utests_sources ${utests_sources} 
builtin_kernel_block_motion_estimate_intel.cpp)
   ADD_CUSTOM_TARGET(kernel_bin.bin DEPENDS ${kernel_bin}.bin)
 endif (NOT_BUILD_STAND_ALONE_UTEST)
 

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Add clGetKernelSubGroupInfoKHR to _cl_icd_dispatch table

2016-10-11 Thread Rebecca N. Palmer

Now I see that the problem is the API cannot be used through ICD, but in 
OpenCL1.2 it is a vender extension.
Beignet may consider it a vendor extension, but ocl-icd doesn't: 
anything with a KHR or EXT name goes through the "check the dispatch 
table first" lookup process, whether or not it exists in the version of 
OpenCL the ICD says it supports.



Should we use the ICD OpenCL2.0 API table of it?

If you mean my patch, yes.


Or maybe we should not use clGetExtensionFunctionForPlatform or 
clGetExtensionFunction to get the API clGetKernelSubGroupInfoKHR?
The alternative of just calling clGetKernelSubGroupInfoKHR without such 
a lookup also goes through the address table when used via ICD, so it 
will have the same problem...plus the additional problem of being a 
compile-time error on older versions of ocl-icd that don't know about 
this function.  (If you want to see the code that does this, build 
ocl-icd then look in ocl_icd_loader_gen.c)


If you want the warning to go away, you'd need to update Beignet's 
headers to 2.0+ (we already do that in Debian (for different reasons), 
it doesn't change the 1.2 version Beignet reports via clGetDeviceInfo) 
*and* use a stricter check (OCL_ICD_IDENTIFIED_FUNCTIONS > 120), or 
suppress it via compiler option (though the latter might also suppress 
the warning if you accidentally put a function in the wrong slot - I 
haven't tried).


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Add clGetKernelSubGroupInfoKHR to _cl_icd_dispatch table

2016-10-10 Thread Rebecca N. Palmer

This patch causes a warning:
/home/yr /beignet/src/cl_khr_icd.c:187:3: warning: initialization from 
incompatible pointer type
   clGetKernelSubGroupInfoKHR,

/home/yr/maintain/beignet/src/cl_khr_icd.c:187:3: warning: (near initialization 
for ‘cl_khr_icd_dispatch.clUnknown136’)


I see that as well, but it does work (with both old and new ocl-icd).


clGetKernelSubGroupInfoKHR is available from OpenCL2.0, also need OpenCL 
version check in beignet?

BTW: ocl-icd 2.2.8's OCL_ICD_IDENTIFIED_FUNCTIONS is 128, is it a typo?


#ifdef CL_VERSION_* checks in current Beignet always give 1.2, the 
version of Beignet's copy of CL/cl.h, but what we actually care about 
here is the version ocl-icd (the ICD loader) was built with: the #if 
(OCL_ICD_IDENTIFIED_FUNCTIONS > 110) is an "OpenCL >=2.0" check for 
that.  (I got the idea from pocl, 
https://anonscm.debian.org/cgit/collab-maint/pocl.git/tree/lib/CL/clGetPlatformIDs.c)


This matters because the ICD loader has a table of function names 
(function_description), while the ICD has a table of function addresses 
(_cl_icd_dispatch) **but no way to tell the loader how long this table 
is**, and the ICD loader's clGetExtensionFunctionForPlatform does 
"search the name table for the requested name, if found return that slot 
in the address table (even if that's NULL or off-the-end garbage), if 
not found in the name table ask the ICD's 
clGetExtensionFunctionForPlatform":

https://www.khronos.org/registry/cl/sdk/2.1/docs/man/xhtml/clGetExtensionFunctionAddressForPlatform.html
https://forge.imag.fr/plugins/scmgit/cgi-bin/gitweb.cgi?p=ocl-icd/ocl-icd.git;a=blob;f=icd_generator.rb;h=d299dbd252ac4b25e62d2e44c0c40dcd179ca3f6;hb=HEAD#l576

Hence, functions that are in the ICD loader's name table but not the 
ICD's address table are impossible to use via ICD, and trying to do so 
may even crash.


The reverse, i.e. having functions in the address table but not the name 
table, is harmless as long as you don't overflow the table's total 
length (now OCL_ICD_IDENTIFIED_FUNCTIONS+50 but was +20 pre-2.0, 
https://forge.imag.fr/plugins/scmgit/cgi-bin/gitweb.cgi?p=ocl-icd/ocl-icd.git;a=blob;f=icd_generator.rb;h=d299dbd252ac4b25e62d2e44c0c40dcd179ca3f6;hb=HEAD#l72); 
hence my use of one #if (OCL_ICD_IDENTIFIED_FUNCTIONS > 110) for the 
whole OpenCL 2.0 block, rather than separate ones for 2.0-core 
(positions 123-135, not currently implemented in Beignet, 
OCL_ICD_IDENTIFIED_FUNCTIONS>110) and clGetKernelSubGroupInfoKHR 
(position 136, >120).


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Utests: Allow testing cl_intel_accelerator via ICD

2016-10-10 Thread Rebecca N. Palmer
> [ Chuanbo ] This utest test intel extension cl_intel_accelerator and 
> cl_intel_motion_estimation, so it's not beignet specific,
> but it's intel specific. You can add code such as:
>   if (!cl_check_accelerator() && !cl_check_motion_estimation()) 
> The implementation of cl_check_accelerator and cl_check_motion_estimation are 
> similar as cl_check_subgroups.

I was going for "not finding this on a beignet device means extension
lookup is broken (which would be a bug), not finding it on other
devices is normal"; this version keeps that while allowing the test
to run on non-beignet devices that do have it, but I haven't has time
to test it (or to check whether we already have a test for extension
lookup).

diff --git a/utests/builtin_kernel_block_motion_estimate_intel.cpp 
b/utests/builtin_kernel_block_motion_estimate_intel.cpp
index 5a48753..59927d2 100644
--- a/utests/builtin_kernel_block_motion_estimate_intel.cpp
+++ b/utests/builtin_kernel_block_motion_estimate_intel.cpp
@@ -40,8 +40,13 @@ void builtin_kernel_block_motion_estimate_intel(void)
   oclCreateAcceleratorIntel  = 
(OCLCREATEACCELERATORINTEL*)clGetExtensionFunctionAddress("clCreateAcceleratorINTEL");
 #endif
   if(!oclCreateAcceleratorIntel){
+// This is expected on non-Intel devices, but a bug (broken extension 
lookup) on Intel devices
+if (!cl_check_beignet()) {
+  printf("Not beignet device , Skip!");
+  return;
+}
 fprintf(stderr, "Failed to get extension clCreateImageFromLibvaIntel\n");
-exit(1);
+OCL_ASSERT(0);
   }
   cl_accelerator_intel accel = oclCreateAcceleratorIntel(ctx, 
CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL,sizeof(cl_motion_estimation_desc_intel),
 , );
   OCL_ASSERT(accel != NULL);
@@ -123,7 +128,7 @@ void builtin_kernel_block_motion_estimate_intel(void)
 #endif
   if(!oclReleaseAcceleratorIntel){
 fprintf(stderr, "Failed to get extension clCreateImageFromLibvaIntel\n");
-exit(1);
+OCL_ASSERT(0);
   }
   oclReleaseAcceleratorIntel(accel);
   clReleaseProgram(built_in_prog);

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Utests: respect existing C/CXXFLAGS

2016-10-08 Thread Rebecca N. Palmer
This extends 4be3664 to the test suite.

Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
 utests/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index a5d8d40..8b33666 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -26,8 +26,8 @@ if (NOT NOT_BUILD_STAND_ALONE_UTEST)
   # Threads
   Find_Package(Threads)
 
-  set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -std=c++0x -Wno-invalid-offsetof")
-  set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}")
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -std=c++0x 
-Wno-invalid-offsetof")
+  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}")
   set (CMAKE_CXX_FLAGS_DEBUG  "-O0 -g -DGBE_DEBUG=1")
   set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
   set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
-- 
2.1.4


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Utests: Allow testing cl_intel_accelerator via ICD

2016-10-08 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
 utests/CMakeLists.txt | 4 ++--
 utests/builtin_kernel_block_motion_estimate_intel.cpp | 4 
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index f94256c..a5d8d40 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -287,7 +287,8 @@ set (utests_sources
   multi_queue_events.cpp
   compiler_mix.cpp
   compiler_math_3op.cpp
-  compiler_bsort.cpp)
+  compiler_bsort.cpp
+  builtin_kernel_block_motion_estimate_intel.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
@@ -328,7 +329,6 @@ else(GEN_PCI_ID)
 endif(GEN_PCI_ID)
 
 if (NOT_BUILD_STAND_ALONE_UTEST)
-  SET(utests_sources ${utests_sources} 
builtin_kernel_block_motion_estimate_intel.cpp)
   ADD_CUSTOM_TARGET(kernel_bin.bin DEPENDS ${kernel_bin}.bin)
 endif (NOT_BUILD_STAND_ALONE_UTEST)
 
diff --git a/utests/builtin_kernel_block_motion_estimate_intel.cpp 
b/utests/builtin_kernel_block_motion_estimate_intel.cpp
index 15bf761..a0f8302 100644
--- a/utests/builtin_kernel_block_motion_estimate_intel.cpp
+++ b/utests/builtin_kernel_block_motion_estimate_intel.cpp
@@ -8,6 +8,10 @@ OCLRELEASEACCELERATORINTEL * oclReleaseAcceleratorIntel = NULL;
 
 void builtin_kernel_block_motion_estimate_intel(void)
 {
+  if (!cl_check_beignet()) {
+printf("Not beignet device , Skip!");
+return;
+  }
   char* built_in_kernel_names;
   size_t built_in_kernels_size;
   cl_int err = CL_SUCCESS;
-- 
2.1.4


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Utests: Don't end an all-tests run when one test fails

2016-10-08 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
 utests/builtin_global_linear_id.cpp   | 2 +-
 utests/builtin_global_size.cpp| 2 +-
 utests/builtin_kernel_block_motion_estimate_intel.cpp | 4 ++--
 utests/builtin_local_size.cpp | 2 +-
 utests/builtin_num_groups.cpp | 2 +-
 utests/runtime_climage_from_boname.cpp| 2 +-
 utests/runtime_flat_address_space.cpp | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/utests/builtin_global_linear_id.cpp 
b/utests/builtin_global_linear_id.cpp
index 24e1d2e..3e92518 100644
--- a/utests/builtin_global_linear_id.cpp
+++ b/utests/builtin_global_linear_id.cpp
@@ -62,7 +62,7 @@ static void builtin_global_linear_id(void)
 if (err != CL_SUCCESS)
 {
   printf("Error: Failed to execute kernel! %d\n", err);
-  exit(1);
+  OCL_ASSERT(0);
 }
 
 clFinish(queue);
diff --git a/utests/builtin_global_size.cpp b/utests/builtin_global_size.cpp
index a2ec24a..51ad054 100644
--- a/utests/builtin_global_size.cpp
+++ b/utests/builtin_global_size.cpp
@@ -74,7 +74,7 @@ static void builtin_global_size(void)
   if (err != CL_SUCCESS)
   {
 printf("Error: Failed to write to source array!\n");
-exit(1);
+OCL_ASSERT(0);
   }
 
   // Run the kernel
diff --git a/utests/builtin_kernel_block_motion_estimate_intel.cpp 
b/utests/builtin_kernel_block_motion_estimate_intel.cpp
index 5a48753..15bf761 100644
--- a/utests/builtin_kernel_block_motion_estimate_intel.cpp
+++ b/utests/builtin_kernel_block_motion_estimate_intel.cpp
@@ -41,7 +41,7 @@ void builtin_kernel_block_motion_estimate_intel(void)
 #endif
   if(!oclCreateAcceleratorIntel){
 fprintf(stderr, "Failed to get extension clCreateImageFromLibvaIntel\n");
-exit(1);
+OCL_ASSERT(0);
   }
   cl_accelerator_intel accel = oclCreateAcceleratorIntel(ctx, 
CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL,sizeof(cl_motion_estimation_desc_intel),
 , );
   OCL_ASSERT(accel != NULL);
@@ -123,7 +123,7 @@ void builtin_kernel_block_motion_estimate_intel(void)
 #endif
   if(!oclReleaseAcceleratorIntel){
 fprintf(stderr, "Failed to get extension clCreateImageFromLibvaIntel\n");
-exit(1);
+OCL_ASSERT(0);
   }
   oclReleaseAcceleratorIntel(accel);
   clReleaseProgram(built_in_prog);
diff --git a/utests/builtin_local_size.cpp b/utests/builtin_local_size.cpp
index 491175d..a55769b 100644
--- a/utests/builtin_local_size.cpp
+++ b/utests/builtin_local_size.cpp
@@ -59,7 +59,7 @@ static void builtin_local_size(void)
   if (err != CL_SUCCESS)
   {
 printf("Error: Failed to write to source array!\n");
-exit(1);
+OCL_ASSERT(0);
   }
 
   // Run the kernel
diff --git a/utests/builtin_num_groups.cpp b/utests/builtin_num_groups.cpp
index 832766e..764c70b 100644
--- a/utests/builtin_num_groups.cpp
+++ b/utests/builtin_num_groups.cpp
@@ -56,7 +56,7 @@ static void builtin_num_groups(void)
   if (err != CL_SUCCESS)
   {
 printf("Error: Failed to write to source array!\n");
-exit(1);
+OCL_ASSERT(0);
   }
 
   // Run the kernel
diff --git a/utests/runtime_climage_from_boname.cpp 
b/utests/runtime_climage_from_boname.cpp
index 2160886..a228c97 100644
--- a/utests/runtime_climage_from_boname.cpp
+++ b/utests/runtime_climage_from_boname.cpp
@@ -161,7 +161,7 @@ void runtime_climage_from_boname(void)
 #endif
   if(!oclCreateImageFromLibvaIntel){
 fprintf(stderr, "Failed to get extension clCreateImageFromLibvaIntel\n");
-exit(1);
+OCL_ASSERT(0);
   }
   cl_mem dst = oclCreateImageFromLibvaIntel(ctx, , NULL);
 
diff --git a/utests/runtime_flat_address_space.cpp 
b/utests/runtime_flat_address_space.cpp
index cf94cf5..c2d25de 100644
--- a/utests/runtime_flat_address_space.cpp
+++ b/utests/runtime_flat_address_space.cpp
@@ -59,7 +59,7 @@ main(int argc, char *argv[])
 for (uint32_t i = 0; dst_buffer && i < n; ++i)
   if (dst_buffer[i] != int(i)) {
 fprintf(stderr, "run-time flat address space failed\n");
-exit(-1);
+OCL_ASSERT(0);
   }
 clEnqueueUnmapMemObject(queue, dst[j], dst_buffer, 0, NULL, NULL);
   }
-- 
2.1.4


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Utests: use clGetExtensionFunctionAddressForPlatform

2016-10-08 Thread Rebecca N. Palmer
This is required to find KHR extensions via ICD, as the loader needs to
know which ICD to send the request to.

If the function is not found, fail the test instead of crashing.
---
 utests/utest_helper.cpp | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index d12dccf..d3fc069 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -891,7 +891,11 @@ int cl_check_subgroups(void)
 return 0;
   }
   if(utestclGetKernelSubGroupInfoKHR == NULL)
-utestclGetKernelSubGroupInfoKHR  = (clGetKernelSubGroupInfoKHR_cb*) 
clGetExtensionFunctionAddress("clGetKernelSubGroupInfoKHR");
+utestclGetKernelSubGroupInfoKHR  = (clGetKernelSubGroupInfoKHR_cb*) 
clGetExtensionFunctionAddressForPlatform(platform,"clGetKernelSubGroupInfoKHR");
+  if(utestclGetKernelSubGroupInfoKHR == NULL) {
+printf("Can't find clGetKernelSubGroupInfoKHR");
+OCL_ASSERT(0);
+  }
   return 1;
 }
 
-- 
2.1.4


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Docs: Spelling and grammar fixes

2016-10-08 Thread Rebecca N. Palmer
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com>
---
 docs/Beignet.mdwn   | 20 ++--
 docs/howto/cross-compiler-howto.mdwn|  4 ++--
 docs/howto/stand-alone-utest-howto.mdwn |  8 
 utests/builtin_global_linear_id.cpp |  2 +-
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
index 8f6f95c..64d33dc 100644
--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -16,7 +16,7 @@ News
 Prerequisite
 
 
-The project depends on the following external libaries:
+The project depends on the following external libraries:
 
 - libdrm libraries (libdrm and libdrm\_intel)
 - Various LLVM components
@@ -33,7 +33,7 @@ you can still link to the beignet OpenCL library. You can 
find the beignet/libcl
 in your system's library installation directories.
 
 Note that the compiler depends on LLVM (Low-Level Virtual Machine project), 
and the
-project normally support 3 latest LLVM released version.
+project normally supports the 3 latest LLVM released versions.
 Right now, the code has been compiled with LLVM 3.6, 3.7 and 3.8. With older
 version LLVM from 3.3, build still support, but no full tests cover.
 
@@ -48,11 +48,11 @@ A simple command to install all the above dependencies for 
ubuntu or debian is:
 
 **The recommended LLVM/CLANG version is 3.6 and/or 3.7**
 
-Based on our test result, LLVM 3.6 and 3.7 has best pass rate on all the test 
suites. Compare
+Based on our test result, LLVM 3.6 and 3.7 has the best pass rate on all the 
test suites. Compared
 to LLVM 3.6 and 3.7, if you used LLVM 3.8, you should pay attention to float 
immediate. For example,
-if you use 1.0 in the kernel, LLVM 3.6 will treate it as 1.0f, a single float, 
because the project
-don't support double float. but LLVM 3.8 will treate it as 1.0, a double foat, 
at the last it may cause
-error. So we recommend use 1.0f instead of 1.0 if you don't need double float.
+if you use 1.0 in the kernel, LLVM 3.6 will treat it as 1.0f, a single float, 
because the project
+doesn't support double float. but LLVM 3.8 will treat it as 1.0, a double 
float, at the last it may cause
+error. So we recommend using 1.0f instead of 1.0 if you don't need double 
float.
 
 For LLVM 3.4 and 3.5, Beignet still support them, but it may be limited to 
support the
 build and major functions.
@@ -112,12 +112,12 @@ It installs the OCL icd vendor files to 
/etc/OpenCL/vendors, if the system suppo
 
 `> make package`
 
-It packages the driver binaries, you may copy the package to another 
machine with simillar system.
+It packages the driver binaries, you may copy the package to another 
machine with similar system.
 
 How to run
 --
 
-After build and install of beignet, you may need to check whether it works on 
your
+After building and installing Beignet, you may need to check whether it works 
on your
 platform. Beignet also produces various tests to ensure the compiler and the 
run-time
 consistency. This small test framework uses a simple c++ registration system to
 register all the unit tests.
@@ -173,7 +173,7 @@ Known Issues
 
   `# echo -n 0 > /sys/module/i915/parameters/enable_hangcheck`
 
-  But this command is a little bit dangerous, as if your kernel really hang, 
then the gpu will lock up
+  But this command is a little bit dangerous, as if your kernel really hangs, 
then the GPU will lock up
   forever until a reboot.
 
 * "Beignet: self-test failed" and almost all unit tests fail.
@@ -207,7 +207,7 @@ Known Issues
 
   `# export OCL_STRICT_CONFORMANCE=0`.
 
-  This would lost some precision but gain performance.
+  This loses some precision but gains performance.
 
 * cl\_khr\_gl\_sharing.
   This extension highly depends on mesa support. It seems that mesa would not 
provide
diff --git a/docs/howto/cross-compiler-howto.mdwn 
b/docs/howto/cross-compiler-howto.mdwn
index d541816..a8a696d 100644
--- a/docs/howto/cross-compiler-howto.mdwn
+++ b/docs/howto/cross-compiler-howto.mdwn
@@ -2,7 +2,7 @@ Cross Compiler HowTo
 
 
 Beignet supports both PC devices with full profile and embedded/handheld
-devices with embeded profile. This document describes how to build Beignet
+devices with embedded profile. This document describes how to build Beignet
 and OpenCL kernels for a target machine (embedded/handheld devices) in a
 host machine with the help of cross compiler, and also the large-size-reduced
 Beignet driver package for the target machine.
@@ -65,7 +65,7 @@ provide only the OpenCL runtime library without OpenCL 
compiler, and only the
 executable binary kernel is supported on such devices.
 
 It means that just distribute libcl.so and libgbeinterp.so (~320k in total 
after strip)
-are enough for OpenCL embeded profile in the target machine. The whole Beignet
+are enough for OpenCL embedded profile in the target machine. The whole Beignet
 driver set can be separated into several packages for dif

Re: [Beignet] [PATCH] Fix build with latest libdrm

2016-10-08 Thread Rebecca N. Palmer

+  if (drm_intel_get_pooled_eu(driver->fd) >= 0) {


Shouldn't this be >0 , not >=0? The libdrm commit message ( 
https://cgit.freedesktop.org/mesa/drm/commit/intel/intel_bufmgr_gem.c?id=98887140e343493f01be7a1dec721c024bcf72c7 
) says 0 means not supported.


(>=0 works for me with libdrm 2.4.71, but on a 3.16 kernel these 
functions are always going to fail...)


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Utest: test pow, not powr, on negative x

2016-09-25 Thread Rebecca N. Palmer
powr(x,y) is explicitly undefined for negative x; on my hardware,
it happens to be correct in default mode, but acts like pow(abs(x),y)
in fast (OCL_STRICT_CONFORMANCE=0) mode, failing the test.

Signed-off-by: Rebecca Palmer 

diff --git a/utests/utest_math_gen.py b/utests/utest_math_gen.py
index a4bfd51..9ef75d5 100755
--- a/utests/utest_math_gen.py
+++ b/utests/utest_math_gen.py
@@ -447,14 +447,20 @@ static float minmag(float x, float y){
   nextafterUtests = 
func('nextafter','nextafterf',[nextafter_input_type1,nextafter_input_type2],nextafter_output_type,[nextafter_input_values1,nextafter_input_values2],'0
 * FLT_ULP')
   
   # gentype pow(gentype x, gentype y)
-  pow_base_values = base_input_values1
+  pow_base_values = [80, -80, 3.14, -3.14, 0.5, 1, 
-3,-4,2,0.0,-0.0,1500.24,-1500.24]
   pow_input_values1 = []
   pow_input_values2 = []
   
pow_input_values1,pow_input_values2=gene2ValuesLoop(pow_input_values1,pow_input_values2,pow_base_values)
   pow_input_type1 = ['float','float2','float4','float8','float16']
   pow_input_type2 = ['float','float2','float4','float8','float16']
   pow_output_type = ['float','float2','float4','float8','float16']
-  powUtests = 
func('pow','powf',[pow_input_type1,pow_input_type2],pow_output_type,[pow_input_values1,pow_input_values2],'16
 * FLT_ULP')
+  pow_cpu_func='''
+static float pow_utest(float x, float y){
+if ((x == 0.0f) && (y == -INFINITY))
+return INFINITY;
+return pow(x,y);
+} '''
+  powUtests = 
func('pow','pow_utest',[pow_input_type1,pow_input_type2],pow_output_type,[pow_input_values1,pow_input_values2],'16
 * FLT_ULP',pow_cpu_func)
   
   # floatn pown(floatn x, intn y)
   pown_input_values1 = 
[FLT_MAX_POSI,FLT_MIN_NEGA,FLT_MIN_POSI,FLT_MAX_NEGA,80, -80, 3.14, -3.14, 0.5, 
1, 0.0,1500.24,-1500.24]
@@ -469,7 +475,7 @@ static float pown(float x, int y){
   pownUtests = 
func('pown','pown',[pown_input_type1,pown_input_type2],pown_output_type,[pown_input_values1,pown_input_values2],'16
 * FLT_ULP', pown_cpu_func)
   
   # gentype powr(gentype x, gentype y)
-  powr_input_values1 = [80, -80, 3.14, 1, 1.257, +0.0, -0.0, +0.0, -0.0, +0.0, 
-0.0, +1, +1, -80, +0.0, -0.0, +0.0, -0.0, 'INFINITY','INFINITY', +1, +1, +0.0, 
2.5,' NAN', 'NAN', 'NAN']
+  powr_input_values1 = [80, 80, 3.14, 1, 1.257, +0.0, -0.0, +0.0, -0.0, +0.0, 
-0.0, +1, +1, 80, +0.0, -0.0, +0.0, -0.0, 'INFINITY','INFINITY', +1, +1, +0.0, 
2.5,' NAN', 'NAN', 'NAN']
   powr_input_values2 = [5.5, 6,7, +0.0, -0.0, -1, -15.67, '-INFINITY', 
'-INFINITY', 1,  -2.7, 10.5, 3.1415, 3.5, -0.0, -0.0, +0.0, +0.0, +0.0, -0.0, 
'INFINITY', '-INFINITY', 'NAN', 'NAN', -1.5, +0.0, 1.5]
   powr_input_type1 = ['float','float2','float4','float8','float16']
   powr_input_type2 = ['float','float2','float4','float8','float16']

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Status of LLVM 3.9 support?

2016-09-25 Thread Rebecca N. Palmer
A patch set for supporting LLVM 3.9 was posted here last month
https://lists.freedesktop.org/archives/beignet/2016-August/007843.html
, but not pushed; why?

Debian mesa plans to switch to 3.9 
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=836913
and we would normally follow.

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] few questions on Beignet-OpenCL-ICD v1.2.0 regarding X11 and kernel patches

2016-09-21 Thread Rebecca N. Palmer
I just built beignet in a chroot with no X11 installed; it didn't have 
that string, and worked (when run as root - for ordinary users, it 
failed with "/dev/dri/card0 not authenticated", but this is expected).


Debian's (with-X) beignet has that string and a few more (__cxx11, 
x11_display, x11_screen, x11_dpy).


Did you do the no-X compile by an explicit option, or by not installing 
the X headers (libx11-dev, libxext-dev, libxfixes-dev)?


Which clinfo is this (there are at least 3 implementations), and can you 
get a backtrace of the error (gdb clinfo)?


beignet doesn't need any kernel patches on recent kernels (>=4.2; check 
with uname -r), and in the known cases where it does, the error is 
"Beignet: self-test failed" not yours.


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] cl_get_gt_device(): error while running OPenCL on Kabylake

2016-07-16 Thread Rebecca N. Palmer

cl_get_gt_device(): error, unknown device: 5916
[...] Is there a support missing for
Kabylake/.?


Yes - that error means "Beignet does not recognize this device, so will 
not try to use it" (from 
https://cgit.freedesktop.org/beignet/tree/src/cl_device_id.c#n597 ; the 
supported devices list is in 
https://cgit.freedesktop.org/beignet/tree/src/cl_device_data.h ).


___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Report build failures in backend to the build log

2016-07-12 Thread Rebecca N. Palmer
I never got a reply to this; is there something wrong with it (i.e. 
should I stop including it in Debian's beignet) or are you just not 
interested in this feature?


In the case I tested, the assert happens in LLVM 3.5 but not 3.7 (which 
instead returns the error "function with no prototype cannot use the 
spir_function calling convention"), but I don't know whether this 
applies to all missing-function errors.


On 06/06/16 23:37, Rebecca N. Palmer wrote:

As noted at llvm_gen_backend:94, we currently lack a mechanism for
reporting failures in backend (beignet-managed) compiler passes to the
build log, and instead print the error to stderr and assert-fail.

This patch creates such a mechanism, and uses it for "function not found".
Please note that it has not had much testing yet.

Points for discussion/improvement:
-Does not currently print source location information.
-Are there other assertions that can be triggered by invalid input, and hence
should also use this?

Signed-off-by: Rebecca Palmer <rebecca_pal...@zoho.com>

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 4f8167c..44cb60b 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -122,7 +122,7 @@ namespace gbe {
   bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string , int optLevel) {
 ir::Unit *unit = new ir::Unit();
 llvm::Module * cloned_module = NULL;
-bool ret = true;
+bool ret = false;
 if(module){
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
   cloned_module = llvm::CloneModule((llvm::Module*)module).release();
@@ -133,7 +133,7 @@ namespace gbe {
 bool strictMath = true;
 if (fast_relaxed_math || !OCL_STRICT_CONFORMANCE)
   strictMath = false;
-if (llvmToGen(*unit, fileName, module, optLevel, strictMath, 
OCL_PROFILING_LOG) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel, strictMath, 
OCL_PROFILING_LOG, error) == false) {
   if (fileName)
 error = std::string(fileName) + " not found";
   delete unit;
@@ -146,15 +146,19 @@ namespace gbe {
   unit = new ir::Unit();
   if(cloned_module){
 //suppose file exists and llvmToGen will not return false.
-llvmToGen(*unit, fileName, cloned_module, 0, strictMath, 
OCL_PROFILING_LOG);
+llvmToGen(*unit, fileName, cloned_module, 0, strictMath, 
OCL_PROFILING_LOG, error);
   }else{
 //suppose file exists and llvmToGen will not return false.
-llvmToGen(*unit, fileName, module, 0, strictMath, OCL_PROFILING_LOG);
+llvmToGen(*unit, fileName, module, 0, strictMath, OCL_PROFILING_LOG, 
error);
   }
 }
-assert(unit->getValid());
-if (!this->buildFromUnit(*unit, error))
-  ret = false;
+if(unit->getValid()){
+  std::string error2;
+  if (this->buildFromUnit(*unit, error2)){
+ret = true;
+  }
+  error = error + error2;
+}
 delete unit;
 if(cloned_module){
   delete (llvm::Module*) cloned_module;
diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index acad1b2..bb00aab 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -513,6 +513,7 @@ namespace gbe
 Function *Func;
 const Module *TheModule;
 int btiBase;
+bool has_errors;
 /*! legacyMode is for hardware before BDW,
  * which do not support stateless memory access */
 bool legacyMode;
@@ -528,6 +529,7 @@ namespace gbe
 LI(0),
 TheModule(0),
 btiBase(BTI_RESERVED_NUM),
+has_errors(false),
 legacyMode(true)
 {
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
@@ -2940,6 +2942,9 @@ namespace gbe
 pass = PASS_EMIT_REGISTERS;
 for (inst_iterator I = inst_begin(), E = inst_end(); I != E; ++I)
   visit(*I);
+
+// Abort if this found an error (otherwise emitBasicBlock will assert)
+if(has_errors){return;}

 // First create all the labels (one per block) ...
 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -3749,11 +3754,8 @@ namespace gbe
 break;
   case GEN_OCL_NOT_FOUND:
   default:
-std::cerr << "Caller instruction: " << std::endl;
-I.dump();
-std::cerr << "Callee function: " << std::endl;
-Callee->dump();
-GBE_ASSERT(0);
+has_errors = true;
+Func->getContext().emitError(,"function '" + fnName + "' not found or 
cannot be inlined");
 };
   }

diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 41723d1..02a69ec 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -26,6 +26,8 @@

 #include "llvm/llvm_gen_backend.hpp"
 #include "llvm/llvm

[Beignet] Pointer load/store - obsolete documentation?

2016-06-09 Thread Rebecca N. Palmer
The documentation says "an unsupported feature which is to store/load 
pointers to/from memory[...]We plan to fix it in next major release 
1.1.0" (docs/Beignet.mdwn:216-219)


Given that 1.1.0 has now been released, and does appear to fix this 
(creating an array of pointers works in 1.1.2 but not in 1.0.3; 
https://cgit.freedesktop.org/beignet/commit/?id=48e22ae4536fbf944f39ab13e8f1133d3f5edcc3), 
should this note now be removed?


(This is the test case I used: would you like it making into a utest?)
/*output in 1.1.2:
built 0 built 0 kernel created 00 returned (7, 8, 7)
1 returned (2, 7, 6)
2 returned (0, 1, 0)

output in 1.0.3:
store i32 7, i32 addrspace(1)* %26, align 4, !tbaa !11
built 0 Illegal pointer which is not from a valid memory space.
Aborting...
*/

#include 
#include 
int main(int argc,char** argv)
{
  cl_platform_id platforms[4];
  cl_uint num_platforms;
  cl_device_id device;
  cl_int status, ret;
  cl_context ctx;
  cl_command_queue queue;
  cl_program program,program2;
  cl_kernel kernel;
  cl_mem buffer[3];
  cl_event kernel_finished;
  FILE *f;
  size_t n = 3,program_length;
  int i;
  cl_int test_data[3][3] = {{3, 8, 5},{2, 4, 6},{0, 1, 0}};
  const char *kernel_source="__kernel void pointer_store_load"
  "(__global int *p0, __global int *p1, __global int *p2)"
  "{__global int * __private a[2];int i;"
  "i=get_global_id(0);a[0]=p0;a[1]=p1;a[p2[i]][i]=7;}";
  char *build_log;
  ret = 2;
  build_log=calloc(1001,1);
  clGetPlatformIDs(4,platforms,_platforms);
  for(i=0;i

[Beignet] [PATCH] Report build failures in backend to the build log

2016-06-06 Thread Rebecca N. Palmer
As noted at llvm_gen_backend:94, we currently lack a mechanism for
reporting failures in backend (beignet-managed) compiler passes to the
build log, and instead print the error to stderr and assert-fail.

This patch creates such a mechanism, and uses it for "function not found".
Please note that it has not had much testing yet.

Points for discussion/improvement:
-Does not currently print source location information.
-Are there other assertions that can be triggered by invalid input, and hence
should also use this?

Signed-off-by: Rebecca Palmer 

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 4f8167c..44cb60b 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -122,7 +122,7 @@ namespace gbe {
   bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string , int optLevel) {
 ir::Unit *unit = new ir::Unit();
 llvm::Module * cloned_module = NULL;
-bool ret = true;
+bool ret = false;
 if(module){
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
   cloned_module = llvm::CloneModule((llvm::Module*)module).release();
@@ -133,7 +133,7 @@ namespace gbe {
 bool strictMath = true;
 if (fast_relaxed_math || !OCL_STRICT_CONFORMANCE)
   strictMath = false;
-if (llvmToGen(*unit, fileName, module, optLevel, strictMath, 
OCL_PROFILING_LOG) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel, strictMath, 
OCL_PROFILING_LOG, error) == false) {
   if (fileName)
 error = std::string(fileName) + " not found";
   delete unit;
@@ -146,15 +146,19 @@ namespace gbe {
   unit = new ir::Unit();
   if(cloned_module){
 //suppose file exists and llvmToGen will not return false.
-llvmToGen(*unit, fileName, cloned_module, 0, strictMath, 
OCL_PROFILING_LOG);
+llvmToGen(*unit, fileName, cloned_module, 0, strictMath, 
OCL_PROFILING_LOG, error);
   }else{
 //suppose file exists and llvmToGen will not return false.
-llvmToGen(*unit, fileName, module, 0, strictMath, OCL_PROFILING_LOG);
+llvmToGen(*unit, fileName, module, 0, strictMath, OCL_PROFILING_LOG, 
error);
   }
 }
-assert(unit->getValid());
-if (!this->buildFromUnit(*unit, error))
-  ret = false;
+if(unit->getValid()){
+  std::string error2;
+  if (this->buildFromUnit(*unit, error2)){
+ret = true;
+  }
+  error = error + error2;
+}
 delete unit;
 if(cloned_module){
   delete (llvm::Module*) cloned_module;
diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index acad1b2..bb00aab 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -513,6 +513,7 @@ namespace gbe
 Function *Func;
 const Module *TheModule;
 int btiBase;
+bool has_errors;
 /*! legacyMode is for hardware before BDW,
  * which do not support stateless memory access */
 bool legacyMode;
@@ -528,6 +529,7 @@ namespace gbe
 LI(0),
 TheModule(0),
 btiBase(BTI_RESERVED_NUM),
+has_errors(false),
 legacyMode(true)
 {
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
@@ -2940,6 +2942,9 @@ namespace gbe
 pass = PASS_EMIT_REGISTERS;
 for (inst_iterator I = inst_begin(), E = inst_end(); I != E; ++I)
   visit(*I);
+
+// Abort if this found an error (otherwise emitBasicBlock will assert)
+if(has_errors){return;}
 
 // First create all the labels (one per block) ...
 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -3749,11 +3754,8 @@ namespace gbe
 break;
   case GEN_OCL_NOT_FOUND:
   default:
-std::cerr << "Caller instruction: " << std::endl;
-I.dump();
-std::cerr << "Callee function: " << std::endl;
-Callee->dump();
-GBE_ASSERT(0);
+has_errors = true;
+Func->getContext().emitError(,"function '" + fnName + "' not found 
or cannot be inlined");
 };
   }
 
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 41723d1..02a69ec 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -26,6 +26,8 @@
 
 #include "llvm/llvm_gen_backend.hpp"
 #include "llvm/llvm_to_gen.hpp"
+#include 
+#include 
 #include "sys/cvar.hpp"
 #include "sys/platform.hpp"
 #include "ir/unit.hpp"
@@ -249,8 +251,36 @@ namespace gbe
   BVAR(OCL_OUTPUT_LLVM_AFTER_LINK, false);
   BVAR(OCL_OUTPUT_LLVM_AFTER_GEN, false);
 
+  class gbeDiagnosticContext
+  {
+  public:
+gbeDiagnosticContext() : _str(""), messages(_str), printer(messages), 
_has_errors(false) {}
+void process(const llvm::DiagnosticInfo )
+{
+  if (diagnostic.getSeverity() != DS_Remark) { // avoid noise from 
function inlining remarks
+diagnostic.print(printer);
+  }
+  if (diagnostic.getSeverity() == DS_Error) {
+ 

[Beignet] [PATCH] FindLLVM: allow LLVM/Clang 3.7

2016-04-24 Thread Rebecca N. Palmer
As beignet now works with LLVM/Clang 3.7, accept this version
when searching for llvm-config.

Signed-off-by: Rebecca Palmer 

--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -8,12 +8,12 @@
 # LLVM_FOUND   - True if llvm found.
 if (LLVM_INSTALL_DIR)
   find_program(LLVM_CONFIG_EXECUTABLE
-   NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 
llvm-config-3.6 llvm-config-33 llvm-config-3.3 llvm-config-34 llvm-config-3.4 
llvm-config
+   NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 
llvm-config-3.6 llvm-config-37 llvm-config-3.7 llvm-config-33 llvm-config-3.3 
llvm-config-34 llvm-config-3.4 llvm-config
DOC "llvm-config executable"
PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
 else (LLVM_INSTALL_DIR)
   find_program(LLVM_CONFIG_EXECUTABLE
-   NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 
llvm-config-3.6 llvm-config-33 llvm-config-3.3 llvm-config-34 llvm-config-3.4 
llvm-config
+   NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 
llvm-config-3.6 llvm-config-37 llvm-config-3.7 llvm-config-33 llvm-config-3.3 
llvm-config-34 llvm-config-3.4 llvm-config
DOC "llvm-config executable")
 endif (LLVM_INSTALL_DIR)
 

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] cl_khr_fp64 on OpenCL 1.2+

2016-03-15 Thread Rebecca N. Palmer

CMake Error: The following variables are used in this project, but they
are set to NOTFOUND.
 Please set them or make sure they are set and tested correctly in the
CMake files:
 CLANG_LIB
[...]
Please let me know what I'm missing.


libclang-3.7-dev ?

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Unrecoverable system lockup when allocating too much memory

2015-11-10 Thread Rebecca N. Palmer

On 10/11/15 02:37, Zou, Nanhai wrote:

looks like something related to drm driver bo management.

dose
export bo_reuse=0
help?

No; if anything, it makes it fill the memory _faster_.

(All of this is in 1.1.1; I haven't tried it in master, at least not 
recently.)




Thanks
Zou Nanhai



-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
Rebecca N. Palmer
Sent: Saturday, November 07, 2015 6:06 AM
To: beignet@lists.freedesktop.org
Subject: Re: [Beignet] Unrecoverable system lockup when allocating too much
memory


(The example in
https://bugs.launchpad.net/ubuntu/+source/pyopencl/+bug/1354086 no
longer hangs, so the "rapidly allocating and freeing pyopencl objects
doesn't actually free the memory" aspect has evidently been fixed, but
keeping too many objects for the available memory still does hang.


On further investigation, it isn't that simple:

-Create and keep (large_array_test.py): With swap, creates the objects in
swap (becoming slow but not actually hanging), then errors out on trying to do
arithmetic on them.  Without swap, errors out in object creation, but not
before the kernel's OOM killer terminates a few pieces of the desktop
(according to the log, it thinks my test is only using ~50MB, so it (like
gnome-system-monitor) evidently can't see GPU memory use).
-Rapid create-then-free (arraybug_test.py) using the result (naive
pyopencl.clmath): Doesn't hang, but may segfault on exit.  (Kernel log:
python3[8857]: segfault at 20 ip 7fa5caac3c84 sp 7ffca782b2a0 error
4 in libpthread-2.21.so[7fa5caaba000+18000] ; I can't be more specific because
it doesn't crash under gdb).
-Rapid create-then-free throwing away the result (this would be a bug in a real
program, but probably shouldn't hang the entire system): With swap, starts
using it, hence becoming slow but not actually hanging (though I haven't taken
it to the point of actually filling the swap).
Without swap, hangs (at least screen output does: desktop clock frozen, mouse
pointer minimally responsive), with disk activity (!);
Alt+SysRq+s,u,b restarts, but usually doesn't write the log).


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Unrecoverable system lockup when allocating too much memory

2015-11-06 Thread Rebecca N. Palmer

(The example in
https://bugs.launchpad.net/ubuntu/+source/pyopencl/+bug/1354086 no
longer hangs, so the "rapidly allocating and freeing pyopencl objects
doesn't actually free the memory" aspect has evidently been fixed, but
keeping too many objects for the available memory still does hang.


On further investigation, it isn't that simple:

-Create and keep (large_array_test.py): With swap, creates the objects 
in swap (becoming slow but not actually hanging), then errors out on 
trying to do arithmetic on them.  Without swap, errors out in object 
creation, but not before the kernel's OOM killer terminates a few pieces 
of the desktop (according to the log, it thinks my test is only using 
~50MB, so it (like gnome-system-monitor) evidently can't see GPU memory 
use).
-Rapid create-then-free (arraybug_test.py) using the result (naive 
pyopencl.clmath): Doesn't hang, but may segfault on exit.  (Kernel log: 
python3[8857]: segfault at 20 ip 7fa5caac3c84 sp 7ffca782b2a0 
error 4 in libpthread-2.21.so[7fa5caaba000+18000] ; I can't be more 
specific because it doesn't crash under gdb).
-Rapid create-then-free throwing away the result (this would be a bug in 
a real program, but probably shouldn't hang the entire system): With 
swap, starts using it, hence becoming slow but not actually hanging 
(though I haven't taken it to the point of actually filling the swap). 
Without swap, hangs (at least screen output does: desktop clock frozen, 
mouse pointer minimally responsive), with disk activity (!); 
Alt+SysRq+s,u,b restarts, but usually doesn't write the log).
#!/usr/bin/env python3
#Depends: python3-pyopencl python3-numpy
from __future__ import division,print_function
import pyopencl
import pyopencl.tools
import pyopencl.array
import numpy as np
import time
import pyopencl.clmath
import gc
import sys
pad=['']
def trace_c(frame,event,arg):
if event=='return':
pad[0]=pad[0][0:-1]
if event!='line':#never actually hits c_call,c_return
print(pad[0],frame.f_code.co_name,frame.f_code.co_filename[len("/home/rnpalmer/.local/lib/python2.7/site-packages/"):],frame.f_code.co_firstlineno,event)
try:
print(arg.__name__)
except AttributeError:
pass
if event=='call':
pad[0]=pad[0]+' '
return trace_c
#sys.settrace(trace_c)
ctx=pyopencl.create_some_context()
cq0=pyopencl.CommandQueue(ctx)
#mpool=pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(cq0))#using this avoids the hang
a=np.random.randn(1e6).astype(np.dtype('float32'))
aCL=pyopencl.array.to_device(cq0,a)#doesn't help: ,allocator=pyopencl.tools.ImmediateAllocator(cq0))
f1=pyopencl.elementwise.ElementwiseKernel(ctx,pyopencl.tools.dtype_to_ctype(aCL.dtype)+" *a,"+pyopencl.tools.dtype_to_ctype(aCL.dtype)+" *b","b[i]=cos(a[i])+sin(a[i])+sqrt(a[i])","cossinsqrt")
bCL=aCL+1
n=0
b=np.cos(a)+np.sin(a)+np.sqrt(a)
bCL=pyopencl.clmath.cos(aCL)+pyopencl.clmath.sin(aCL)+pyopencl.clmath.sqrt(aCL)
s=""
bCLlist=[]
print("gc:",gc.collect())
while s=="":
#bCL=aCL._new_with_changes(None,None)#(used by empty_like,etc.)doesn't hang within 10,000
#bCL=aCL._new_like_me()#(used by +,etc.)doesn't hang within 10,000
#bCL=pyopencl.array.zeros_like(aCL)#doesn't hang within 10,000
#bCL=pyopencl.array.empty_like(aCL)#doesn't hang within 5500
#bCL=pyopencl.array.empty_like(aCL);f1(aCL,bCL).wait()#doesn't hang within 8000
#bCL=pyopencl.array.empty_like(aCL);f2=pyopencl.elementwise.ElementwiseKernel(ctx,pyopencl.tools.dtype_to_ctype(bCL.dtype)+" *a,"+pyopencl.tools.dtype_to_ctype(bCL.dtype)+" *b","b[i]=cos(a[i])+sin(a[i])+sqrt(a[i])","cossinsqrt");f2(aCL,bCL).wait()#doesn't hang within 5500
#cCL=pyopencl.array.to_device(cq0,a);bCL=pyopencl.array.to_device(cq0,a);f1(cCL,bCL).wait()#doesn't hang within 5000
#bCL=pyopencl.array.empty_like(aCL);f1(bCL,aCL).wait()#doesn't hang within 5500
#bCL=pyopencl.array.to_device(cq0,a)#doesn't hang within 10,000
bCL=pyopencl.array.to_device(cq0,a);pyopencl.enqueue_copy(cq0,bCL.data,aCL.data)#hangs after ~700, visible to System Monitor#Xenial: starts swapping (temporary freezes) after ~700, erratic displayed RAM usage, reached ~4500/15GB without a full hang; with swap disabled, hangs after ~700
#bCL=pyopencl.array.to_device(cq0,a);pyopencl.enqueue_copy(cq0,bCL.data,aCL.data).wait()#doesn't hang within 8000
#bCL=pyopencl.array.to_device(cq0,a);cCL=pyopencl.array.to_device(cq0,a);pyopencl.enqueue_copy(cq0,bCL.data,cCL.data)#hangs after ~350
#cCL=pyopencl.array.to_device(cq0,a);pyopencl.enqueue_copy(cq0,bCL.data,cCL.data)#hangs after ~700, visible to System Monitor and Valgrind
#cCL=pyopencl.array.to_device(cq0,a);pyopencl.enqueue_copy(cq0,bCL.data,cCL.data).wait()#doesn't hang within 5000
#bCL=pyopencl.array.to_device(cq0,a);cCL=pyopencl.array.to_device(cq0,a);bCL=cCL+cCL#hangs after ~350, partly visible to System Monitor and Valgrind

Re: [Beignet] Unrecoverable system lockup when allocating too much memory

2015-11-05 Thread Rebecca N. Palmer

Would this be better if you turn off the overcommit via proc fs?


Only if you also disable any swap space ( sudo swapoff -a && sudo sh -c 
"echo -n 2 > /proc/sys/vm/overcommit_memory" #warning, this may itself 
crash your desktop); if I disable overcommit but leave swap on, I get a 
hang with the following trace.


(The example in 
https://bugs.launchpad.net/ubuntu/+source/pyopencl/+bug/1354086 no 
longer hangs, so the "rapidly allocating and freeing pyopencl objects 
doesn't actually free the memory" aspect has evidently been fixed, but 
keeping too many objects for the available memory still does hang. 
Though for me, SysRq still works.)


Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.597991] Purging GPU 
memory, 0 bytes freed, 5685248 bytes still pinned.
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598134] Xorg invoked 
oom-killer: gfp_mask=0x0, order=0, oom_score_adj=0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598136] Xorg cpuset=/ 
mems_allowed=0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598140] CPU: 3 PID: 823 
Comm: Xorg Not tainted 3.16.0-4-amd64 #1 Debian 3.16.7-ckt11-1+deb8u5
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598142] Hardware name: 
TOSHIBA SATELLITE PRO C50-A-1E4/PT10F, BIOS 1.20 09/04/2013
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598144]   
8150b4c5 880036dc69a0 81509127
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598147]  0056c000 
880149207b30 880149207c08 
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598149]  8800a8b0a000 
a058c2ab 88014920 0100

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598152] Call Trace:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598159] 
[] ? dump_stack+0x41/0x51
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598163] 
[] ? dump_header+0x76/0x1e8
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598182] 
[] ? i915_gem_shrinker_oom+0x15b/0x1c0 [i915]
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598186] 
[] ? oom_kill_process+0x21d/0x370
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598189] 
[] ? find_lock_task_mm+0x3d/0x90
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598191] 
[] ? out_of_memory+0x473/0x4b0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598194] 
[] ? pagefault_out_of_memory+0x6f/0x80
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598198] 
[] ? __do_page_fault+0x3c5/0x4f0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598201] 
[] ? do_mmap_pgoff+0x2e9/0x3b0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598205] 
[] ? dput+0x9e/0x170
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598207] 
[] ? do_vfs_ioctl+0x2cf/0x4b0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598211] 
[] ? page_fault+0x28/0x30

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598212] Mem-Info:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598214] Node 0 DMA per-cpu:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598215] CPU0: hi: 
0, btch:   1 usd:   0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598217] CPU1: hi: 
0, btch:   1 usd:   0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598218] CPU2: hi: 
0, btch:   1 usd:   0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598219] CPU3: hi: 
0, btch:   1 usd:   0

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598220] Node 0 DMA32 per-cpu:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598222] CPU0: hi: 
186, btch:  31 usd: 133
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598223] CPU1: hi: 
186, btch:  31 usd: 168
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598224] CPU2: hi: 
186, btch:  31 usd: 184
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598226] CPU3: hi: 
186, btch:  31 usd: 180
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598226] Node 0 Normal 
per-cpu:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598228] CPU0: hi: 
186, btch:  31 usd: 125
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598229] CPU1: hi: 
186, btch:  31 usd: 166
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598230] CPU2: hi: 
186, btch:  31 usd: 197
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598232] CPU3: hi: 
186, btch:  31 usd: 146
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236] active_anon:61164 
inactive_anon:263991 isolated_anon:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236] 
active_file:142392 inactive_file:107249 isolated_file:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  unevictable:0 
dirty:129 writeback:0 unstable:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  free:364625 
slab_reclaimable:23144 slab_unreclaimable:7171
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  mapped:38107 
shmem:264190 pagetables:4104 bounce:0

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  free_cma:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598239] Node 0 DMA 
free:15612kB min:272kB low:340kB high:408kB active_anon:0kB 
inactive_anon:56kB active_file:140kB inactive_file:0kB unevictable:0kB 
isolated(anon):0kB 

Re: [Beignet] Fwd: [Bug 1277925] New: clinfo: Failed to release test userptr object! (9) i915 kernel driver may not be sane!

2015-11-04 Thread Rebecca N. Palmer
Specifically (and assuming it is the same bug on Ivy Bridge and 
Haswell), fixed in git master by 
http://cgit.freedesktop.org/beignet/commit/?id=f48b4f6766fcaa193652fcbe6ea0bb29f92e45aa 
, still present in 1.1.x.


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Unrecoverable system lockup when allocating too much memory

2015-11-04 Thread Rebecca N. Palmer
This has been the case for some time 
(https://bugs.launchpad.net/ubuntu/+source/pyopencl/+bug/1354086); given 
that I've also occasionally seen it in graphics-only use, the root cause 
may be further down the stack.


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string

2015-11-03 Thread Rebecca N. Palmer
When p == end (the null terminator byte), don't try to read p + 1:
as this is outside the string, it might be a '%' from a different
object (causing __parse_printf_state(end + 2, end, ...) to be called,
which will fail), or an invalid address.

Signed-off-by: Rebecca Palmer 
---
 backend/src/llvm/llvm_printf_parser.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/src/llvm/llvm_printf_parser.cpp 
b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..f427107 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -229,7 +229,7 @@ again:
 printf("string end with %%\n");
 goto error;
   }
-  if (*(p + 1) == '%') { // %%
+  if (p + 1 < end && *(p + 1) == '%') { // %%
 p += 2;
 goto again;
   }

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Icedove mangling patches

2015-11-02 Thread Rebecca N. Palmer

On 03/11/15 01:50, Pan, Xiuli wrote:

LGTM, but the commit line is too long,
When it was written, the longest line was 66, but something appears to 
have joined it back together; does


https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/email-clients.txt#n208 
wrote:

- Set "mailnews.send_plaintext_flowed" to "false"

- Set "mailnews.wraplength" from "72" to "0"


not work any more?

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH v2] GBE: Don't read past end of printf format string

2015-11-02 Thread Rebecca N. Palmer
When p==end (the null terminator byte), don't try to read p+1
(outside the string, so might be an invalid address or a '%' from
a different object).

Signed-off-by: Rebecca Palmer 

diff --git a/backend/src/llvm/llvm_printf_parser.cpp 
b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..f427107 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -229,7 +229,7 @@ again:
 printf("string end with %%\n");
 goto error;
   }
-  if (*(p + 1) == '%') { // %%
+  if (p + 1 < end && *(p + 1) == '%') { // %%
 p += 2;
 goto again;
   }

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Does LLVM 3.6 still hit a bug?

2015-11-01 Thread Rebecca N. Palmer
When was this workaround done (in particular, is beignet 1.1.1 
affected)?  As the Khronos test suite is non-public, I can't test this 
myself.


Debian have now announced an intention to remove LLVM 3.5 
(https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=803643).


An alternative option (which I haven't tested yet but appears to be 
working in Fedora) would be to apply 27522f9..2af7dea and go straight to 
LLVM 3.7.


On 09/10/15 06:56, Zou, Nanhai wrote:

It's a bug related to conditional compare.
The bug will affect the float saturate implementation in Beignet, we have 
worked around it.
The bug was exposed by a subcase in Khronos OpenCL conformance test, we will 
try to isolate the bug to report to llvm.

Thanks
Zou Nanhai



-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
Rebecca N. Palmer
Sent: Thursday, October 01, 2015 1:32 AM
To: beignet@lists.freedesktop.org
Subject: [Beignet] Does LLVM 3.6 still hit a bug?

Debian are planning to switch their default LLVM/Clang to 3.6 soon.  Is it still
the case that

The recommended LLVM/CLANG version is 3.5 and/or 3.6. Based on our test

result, LLVM 3.5 has best pass rate on all the test suites. Compare to LLVM 3.5,
LLVM 3.6 has slightly lower pass rate(caused by one front end bug at clang 3.6)
but has better performance (3% to 5% up).
(http://www.freedesktop.org/wiki/Software/Beignet/)?  Where can I find
code to test for this bug (the test suite doesn't)?

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Don't read past end of printf format string

2015-11-01 Thread Rebecca N. Palmer
Reading p+1 when p==end is an out of bounds read.

Signed-off-by: Rebecca Palmer 

---
(Found by valgrind while investigating #90472; probably not the
actual cause of that crash, but still a bug.)

diff --git a/backend/src/llvm/llvm_printf_parser.cpp 
b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..f427107 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -229,7 +229,7 @@ again:
 printf("string end with %%\n");
 goto error;
   }
-  if (*(p + 1) == '%') { // %%
+  if (p + 1 < end && *(p + 1) == '%') { // %%
 p += 2;
 goto again;
   }

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Does LLVM 3.6 still hit a bug?

2015-09-30 Thread Rebecca N. Palmer
Debian are planning to switch their default LLVM/Clang to 3.6 soon.  Is 
it still the case that

The recommended LLVM/CLANG version is 3.5 and/or 3.6. Based on our test result, 
LLVM 3.5 has best pass rate on all the test suites. Compare to LLVM 3.5, LLVM 
3.6 has slightly lower pass rate(caused by one front end bug at clang 3.6) but 
has better performance (3% to 5% up).
(http://www.freedesktop.org/wiki/Software/Beignet/)?  Where can I find 
code to test for this bug (the test suite doesn't)?


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Still getting "Failed to release test userptr object! (9) i915 kernel driver may not be sane!"

2015-09-19 Thread Rebecca N. Palmer
I also have an i5-3230M, and get the same error in my build chroots 
(Debian sid = libdrm 2.4.64) but not my base system (Debian jessie = 
libdrm 2.4.58); I have not attempted to determine whether the problem is 
the version or being in a chroot.


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] Add a sanity test in clGetDeviceIDs

2015-05-16 Thread Rebecca N. Palmer
Run a small __local-using kernel in clGetDeviceIDs; if this returns
the wrong result, return CL_DEVICE_NOT_FOUND.
---

 just check kernel version is not
 an ideal method for those unofficial kernels with back porting patches. Then 
 we have the
 following open questions in my mind:
 
   How do we check whether the i915 KMD support secure batch buffer execution 
 if the batch
   buffer pass the cmd parser check under full-ppgtt mode in UMD?
 
   How do we check whether the i915 KMD support secure batch buffer execution 
 with aliasing
   ppgtt after the merging of the patch drm/i915: Arm cmd parser with 
 aliasing ppgtt only in UMD?

As far as I can see, there's no way to tell in advance (except
unreliably with a global version check) whether __local-using batches
will be accepted...so the easiest solution is probably to just try
running one and see what result we get.

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 6aa6b3b..218b7a5 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -545,6 +545,74 @@ skl_gt4_break:
   return ret;
 }
 
+/* Runs a small kernel to check that the device works; returns
+ * 0 for success, 1 for silently wrong result, 2 for error */
+LOCAL cl_int
+cl_self_test(cl_device_id device)
+{
+  cl_int status, ret;
+  cl_context ctx;
+  cl_command_queue queue;
+  cl_program program;
+  cl_kernel kernel;
+  cl_mem buffer;
+  cl_event kernel_finished;
+  size_t n = 3;
+  cl_int test_data[3] = {3, 7, 5};
+  const char* kernel_source = __kernel void self_test(__global int *buf) {
+__local int tmp[3];
+tmp[get_local_id(0)] = buf[get_local_id(0)];
+barrier(CLK_LOCAL_MEM_FENCE);
+buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];
+  }; // using __local to catch the no SLM on Haswell problem
+  ret = 2;
+  ctx = clCreateContext(NULL, 1, device, NULL, NULL, status);
+  if (status == CL_SUCCESS) {
+queue = clCreateCommandQueue(ctx, device, 0, status);
+if (status == CL_SUCCESS) {
+  program = clCreateProgramWithSource(ctx, 1, kernel_source, NULL, 
status);
+  if (status == CL_SUCCESS) {
+status = clBuildProgram(program, 1, device, , NULL, NULL);
+if (status == CL_SUCCESS) {
+  kernel = clCreateKernel(program, self_test, status);
+  if (status == CL_SUCCESS) {
+buffer = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n*4, test_data, 
status);
+if (status == CL_SUCCESS) {
+  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), buffer);
+  if (status == CL_SUCCESS) {
+status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, n, 
n, 0, NULL, kernel_finished);
+if (status == CL_SUCCESS) {
+  status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, 
test_data, 1, kernel_finished, NULL);
+  if (status == CL_SUCCESS) {
+if (test_data[0] == 8  test_data[1] == 14  
test_data[2] == 8){
+  ret = 0;
+} else {
+  ret = 1;
+  printf(Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) 
returned (%i, %i, %i)\n
+  See README.md or 
http://www.freedesktop.org/wiki/Software/Beignet/\n;,
+  test_data[0], test_data[1], test_data[2]);
+}
+  }
+}
+  }
+}
+clReleaseMemObject(buffer);
+  }
+  clReleaseKernel(kernel);
+}
+  }
+  clReleaseProgram(program);
+}
+clReleaseCommandQueue(queue);
+  }
+  clReleaseContext(ctx);
+  if (ret == 2) {
+printf(Beignet: self-test failed: error %i\n
+See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n;, 
status);
+  }
+  return ret;
+}
+
 LOCAL cl_int
 cl_get_device_ids(cl_platform_idplatform,
   cl_device_typedevice_type,
@@ -556,6 +624,20 @@ cl_get_device_ids(cl_platform_idplatform,
 
   /* Do we have a usable device? */
   device = cl_get_gt_device();
+  if (device  cl_self_test(device)) {
+int disable_self_test = 0;
+// can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
+const char *env = getenv(OCL_IGNORE_SELF_TEST);
+if (env != NULL) {
+  sscanf(env, %i, disable_self_test);
+}
+if (disable_self_test) {
+  printf(Beignet: Warning - overriding self-test failure\n);
+} else {
+  printf(Beignet: disabling non-working device\n);
+  device = 0;
+}
+  }
   if (!device) {
 if (num_devices)
   *num_devices = 0;
diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in
index ac06b10..67e3bf1 100644
--- a/utests/setenv.sh.in
+++ b/utests/setenv.sh.in
@@ -6,3 +6,5 @@ export OCL_PCH_PATH=@LOCAL_OCL_PCH_OBJECT@
 export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
 export OCL_GBE_PATH=@LOCAL_GBE_OBJECT_DIR@
 export OCL_INTERP_PATH=@LOCAL_INTERP_OBJECT_DIR@
+#disable self-test so we can get something more 

Re: [Beignet] [PATCH 1/2] Add a sanity test in clGetDeviceIDs + [PATCH 2/2] Docs: update/clarify Haswell issues

2015-05-16 Thread Rebecca N. Palmer
Sorry, both of those should have been

Signed-off-by: Rebecca Palmer rebecca_pal...@zoho.com

As usual, I can only test on Ivy Bridge, so someone should
probably check that they actually catch the
no-__local-on-Haswell bug.

On 16/05/15 18:48, Rebecca N. Palmer wrote:
 Run a small __local-using kernel in clGetDeviceIDs; if this returns
 the wrong result, return CL_DEVICE_NOT_FOUND.
 ---
 
 just check kernel version is not
 an ideal method for those unofficial kernels with back porting patches. Then 
 we have the
 following open questions in my mind:

   How do we check whether the i915 KMD support secure batch buffer execution 
 if the batch
   buffer pass the cmd parser check under full-ppgtt mode in UMD?

   How do we check whether the i915 KMD support secure batch buffer execution 
 with aliasing
   ppgtt after the merging of the patch drm/i915: Arm cmd parser with 
 aliasing ppgtt only in UMD?
 
 As far as I can see, there's no way to tell in advance (except
 unreliably with a global version check) whether __local-using batches
 will be accepted...so the easiest solution is probably to just try
 running one and see what result we get.
 
 diff --git a/src/cl_device_id.c b/src/cl_device_id.c
 index 6aa6b3b..218b7a5 100644
 --- a/src/cl_device_id.c
 +++ b/src/cl_device_id.c
 @@ -545,6 +545,74 @@ skl_gt4_break:
return ret;
  }
  
 +/* Runs a small kernel to check that the device works; returns
 + * 0 for success, 1 for silently wrong result, 2 for error */
 +LOCAL cl_int
 +cl_self_test(cl_device_id device)
 +{
 +  cl_int status, ret;
 +  cl_context ctx;
 +  cl_command_queue queue;
 +  cl_program program;
 +  cl_kernel kernel;
 +  cl_mem buffer;
 +  cl_event kernel_finished;
 +  size_t n = 3;
 +  cl_int test_data[3] = {3, 7, 5};
 +  const char* kernel_source = __kernel void self_test(__global int *buf) {
 +__local int tmp[3];
 +tmp[get_local_id(0)] = buf[get_local_id(0)];
 +barrier(CLK_LOCAL_MEM_FENCE);
 +buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + 
 buf[get_global_id(0)];
 +  }; // using __local to catch the no SLM on Haswell problem
 +  ret = 2;
 +  ctx = clCreateContext(NULL, 1, device, NULL, NULL, status);
 +  if (status == CL_SUCCESS) {
 +queue = clCreateCommandQueue(ctx, device, 0, status);
 +if (status == CL_SUCCESS) {
 +  program = clCreateProgramWithSource(ctx, 1, kernel_source, NULL, 
 status);
 +  if (status == CL_SUCCESS) {
 +status = clBuildProgram(program, 1, device, , NULL, NULL);
 +if (status == CL_SUCCESS) {
 +  kernel = clCreateKernel(program, self_test, status);
 +  if (status == CL_SUCCESS) {
 +buffer = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n*4, 
 test_data, status);
 +if (status == CL_SUCCESS) {
 +  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), buffer);
 +  if (status == CL_SUCCESS) {
 +status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, n, 
 n, 0, NULL, kernel_finished);
 +if (status == CL_SUCCESS) {
 +  status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, 
 n*4, test_data, 1, kernel_finished, NULL);
 +  if (status == CL_SUCCESS) {
 +if (test_data[0] == 8  test_data[1] == 14  
 test_data[2] == 8){
 +  ret = 0;
 +} else {
 +  ret = 1;
 +  printf(Beignet: self-test failed: (3, 7, 5) + (5, 7, 
 3) returned (%i, %i, %i)\n
 +  See README.md or 
 http://www.freedesktop.org/wiki/Software/Beignet/\n;,
 +  test_data[0], test_data[1], test_data[2]);
 +}
 +  }
 +}
 +  }
 +}
 +clReleaseMemObject(buffer);
 +  }
 +  clReleaseKernel(kernel);
 +}
 +  }
 +  clReleaseProgram(program);
 +}
 +clReleaseCommandQueue(queue);
 +  }
 +  clReleaseContext(ctx);
 +  if (ret == 2) {
 +printf(Beignet: self-test failed: error %i\n
 +See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n;, 
 status);
 +  }
 +  return ret;
 +}
 +
  LOCAL cl_int
  cl_get_device_ids(cl_platform_idplatform,
cl_device_typedevice_type,
 @@ -556,6 +624,20 @@ cl_get_device_ids(cl_platform_idplatform,
  
/* Do we have a usable device? */
device = cl_get_gt_device();
 +  if (device  cl_self_test(device)) {
 +int disable_self_test = 0;
 +// can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
 +const char *env = getenv(OCL_IGNORE_SELF_TEST);
 +if (env != NULL) {
 +  sscanf(env, %i, disable_self_test);
 +}
 +if (disable_self_test) {
 +  printf(Beignet: Warning - overriding self-test failure\n);
 +} else {
 +  printf(Beignet: disabling non-working device\n);
 +  device = 0;
 +}
 +  }
if (!device) {
  if (num_devices)
*num_devices = 0;
 diff --git

Re: [Beignet] printf not working (or any thing for that matter)

2015-05-15 Thread Rebecca N. Palmer
Nothing works on Haswell-ULT is a known bug, fixed by 
http://cgit.freedesktop.org/beignet/commit/?id=83f8739b6fc4893fac60145326052ccb5cf653dc

(Should that also be applied to the 1.0 branch?)

If you prefer not to recompile beignet, a workaround is:
sudo echo 0  /sys/module/i915/parameters/enable_cmd_parser

Note that even with this fixed, the __local memory space will not work; 
see https://01.org/zh/beignet/downloads/linux-kernel-patch-hsw-support 
if you need that.


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] Copyright problem in backend/src/ir/structural_analysis.*

2015-05-14 Thread Rebecca N. Palmer
Since commit d47f6dd8f308323919d2acb0c1b9f562c084866c, beignet has 
included backend/src/ir/structural_analysis.* based on gpuocelot.


gpuocelot's license was then BSD plus you must obey US export 
restrictions (even if you are not in the US).  This is non-free by 
Debian's definition[1,2], and means that beignet as a whole is no longer 
LGPL.


gpuocelot has since added a top-level LICENSE notice without the 
problematic clause[3], but also still has ocelot/COPYING with it[4], 
making its actual license unclear; the obvious next step would be to ask 
them for clarification.


This is currently being discussed in Debian at [5].

[1] https://lists.debian.org/debian-legal/2015/04/msg00030.html
[2] https://people.debian.org/~bap/dfsg-faq part 12p
[3] https://github.com/gtcasl/gpuocelot/blob/master/LICENSE
[4] https://github.com/gtcasl/gpuocelot/blob/master/ocelot/COPYING
[5] 
http://lists.alioth.debian.org/pipermail/pkg-opencl-devel/Week-of-Mon-20150511/date.html


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Haswell issues

2015-04-29 Thread Rebecca N. Palmer

After revert the beignet's atomic in l3 patch [9e8874c ?], the 
i915.enable_ppgtt=2 should work for HSW platform.
If it's that easy to really fix this, I agree we should do so; I'd taken 
the absence of anything other than patch the kernel in the 
documentation to mean that no other fix was known.



Daniel already submitted a patchset to address part of this issue.

http://lists.freedesktop.org/archives/intel-gfx/2015-April/064635.html ?


could you help to identify the first kernel version which this enable_ppgtt=2 
works for beignet?
Looks like 
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=78a423772d08eb5a048765a883b5b5a308ea0d0f 
but I don't have the hardware to try it.


___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] utests: fix test case builtin_tgamma.

2015-04-29 Thread Rebecca N. Palmer

On 29/04/15 08:54, Zhigang Gong wrote:

The patch LGTM too, but the patch seems broken for me, could you check
and send a new version?

Don't know what's wrong with this one: what error do you get?

Compare with tgamma instead of tgammaf for better accuracy.
Include negative inputs, and handle the resulting denormals.
Print maximum error found.

Signed-off-by: Rebecca Palmer rebecca_pal...@zoho.com

diff --git a/utests/builtin_tgamma.cpp b/utests/builtin_tgamma.cpp
index 47cc5f4..b7db69b 100644
--- a/utests/builtin_tgamma.cpp
+++ b/utests/builtin_tgamma.cpp
@@ -20,10 +20,15 @@ void builtin_tgamma(void)
   if (env_strict == NULL || strcmp(env_strict, 0) == 0)
 ULPSIZE_FACTOR = 1.;
 
-  for (int j = 0; j  1024; j ++) {

+  cl_device_fp_config fp_config;
+  clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), 
fp_config, 0);
+  bool denormals_supported = fp_config  CL_FP_DENORM;
+  float max_ulp = 0, max_ulp_at = 0;
+
+  for (int j = 0; j  128; j ++) {
 OCL_MAP_BUFFER(0);
 for (int i = 0; i  n; ++i) {
-  src[i] = ((float*)buf_data[0])[i] = (j*n+i+1) * 0.001f;
+  src[i] = ((float*)buf_data[0])[i] = j - 64 + i*0.001f;
 }
 OCL_UNMAP_BUFFER(0);
 
@@ -32,7 +37,14 @@ void builtin_tgamma(void)

 OCL_MAP_BUFFER(1);
 float *dst = (float*)buf_data[1];
 for (int i = 0; i  n; ++i) {
-  float cpu = tgammaf(src[i]);
+  float cpu = tgamma(src[i]);
+  if (!denormals_supported  std::fpclassify(cpu)==FP_SUBNORMAL  
dst[i]==0) {
+cpu = 0;
+  }
+  if (fabsf(cpu - dst[i])  cl_FLT_ULP(cpu) * max_ulp) {
+max_ulp = fabsf(cpu - dst[i]) / cl_FLT_ULP(cpu);
+max_ulp_at = src[i];
+  }
   if (isinf(cpu)) {
 OCL_ASSERT(isinf(dst[i]));
   } else if (fabsf(cpu - dst[i]) = cl_FLT_ULP(cpu) * ULPSIZE_FACTOR) {
@@ -42,6 +54,7 @@ void builtin_tgamma(void)
 }
 OCL_UNMAP_BUFFER(1);
   }
+  printf(max error=%f ulp at x=%f , max_ulp, max_ulp_at);
 }
 
 MAKE_UTEST_FROM_FUNCTION(builtin_tgamma);
Compare with tgamma instead of tgammaf for better accuracy.
Include negative inputs, and handle the resulting denormals.
Print maximum error found.

Signed-off-by: Rebecca Palmer rebecca_pal...@zoho.com

diff --git a/utests/builtin_tgamma.cpp b/utests/builtin_tgamma.cpp
index 47cc5f4..b7db69b 100644
--- a/utests/builtin_tgamma.cpp
+++ b/utests/builtin_tgamma.cpp
@@ -20,10 +20,15 @@ void builtin_tgamma(void)
   if (env_strict == NULL || strcmp(env_strict, 0) == 0)
 ULPSIZE_FACTOR = 1.;
 
-  for (int j = 0; j  1024; j ++) {
+  cl_device_fp_config fp_config;
+  clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), fp_config, 0);
+  bool denormals_supported = fp_config  CL_FP_DENORM;
+  float max_ulp = 0, max_ulp_at = 0;
+
+  for (int j = 0; j  128; j ++) {
 OCL_MAP_BUFFER(0);
 for (int i = 0; i  n; ++i) {
-  src[i] = ((float*)buf_data[0])[i] = (j*n+i+1) * 0.001f;
+  src[i] = ((float*)buf_data[0])[i] = j - 64 + i*0.001f;
 }
 OCL_UNMAP_BUFFER(0);
 
@@ -32,7 +37,14 @@ void builtin_tgamma(void)
 OCL_MAP_BUFFER(1);
 float *dst = (float*)buf_data[1];
 for (int i = 0; i  n; ++i) {
-  float cpu = tgammaf(src[i]);
+  float cpu = tgamma(src[i]);
+  if (!denormals_supported  std::fpclassify(cpu)==FP_SUBNORMAL  dst[i]==0) {
+cpu = 0;
+  }
+  if (fabsf(cpu - dst[i])  cl_FLT_ULP(cpu) * max_ulp) {
+max_ulp = fabsf(cpu - dst[i]) / cl_FLT_ULP(cpu);
+max_ulp_at = src[i];
+  }
   if (isinf(cpu)) {
 OCL_ASSERT(isinf(dst[i]));
   } else if (fabsf(cpu - dst[i]) = cl_FLT_ULP(cpu) * ULPSIZE_FACTOR) {
@@ -42,6 +54,7 @@ void builtin_tgamma(void)
 }
 OCL_UNMAP_BUFFER(1);
   }
+  printf(max error=%f ulp at x=%f , max_ulp, max_ulp_at);
 }
 
 MAKE_UTEST_FROM_FUNCTION(builtin_tgamma);
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Allow building with Python 3

2015-04-29 Thread Rebecca N. Palmer

-61,[8] +62[,8] means the following block adds one more line:

No, that means the _preceding_ block(s) add one more line, which they do.


  raise
  all_vector = 1,2,3,4,8,16

There was a space-only line between these: has the email system lost it?

Make the build scripts work in both Python 2 and Python 3.
(CMake prefers Python 2 if both are available, but will use
Python 3 if only it is installed.)

Signed-off-by: Rebecca Palmer rebecca_pal...@zoho.com

diff --git a/backend/src/libocl/script/gen_vector.py 
b/backend/src/libocl/script/gen_vector.py
index ffc573a..92582f5 100755
--- a/backend/src/libocl/script/gen_vector.py
+++ b/backend/src/libocl/script/gen_vector.py
@@ -20,13 +20,14 @@
 
 # This file is to generate inline code to lower down those builtin

 # vector functions to scalar functions.
+from __future__ import print_function
 import re
 import sys
 import os
 
 if len(sys.argv) != 4:

-print Invalid argument {0}.format(sys.argv)
-print use {0} spec_file_name output_file_name 
just_proto.format(sys.argv[0])
+print(Invalid argument {0}.format(sys.argv))
+print(use {0} spec_file_name output_file_name 
just_proto.format(sys.argv[0]))
 raise
 
 all_vector = 1,2,3,4,8,16

@@ -61,8 +62,8 @@ all_type = all_int_type + all_float_type
 
 # all vector/scalar types

 for t in all_type:
-exec {0}n = [\{0}n\, gen_vector_type([\{0}\])].format(t)
-exec s{0} = [\{0}\, gen_vector_type([\{0}\], [1])].format(t)
+exec({0}n = [\{0}n\, gen_vector_type([\{0}\])].format(t))
+exec(s{0} = [\{0}\, gen_vector_type([\{0}\], [1])].format(t))
 
 # Predefined type sets according to the Open CL spec.

 math_gentype = [math_gentype, gen_vector_type(all_float_type)]
@@ -124,8 +125,8 @@ def check_type(types):
 for t in types:
 memspace, t = stripMemSpace(t)
 if not t in type_dict:
-print t
-raise found invalid type.
+print(t)
+raise TypeError(found invalid type.)
 
 def match_unsigned(dtype):

 if dtype[0] == 'float':
@@ -187,8 +188,8 @@ def fixup_type(dstType, srcType, n):
 if (len(dstType) == len(srcType)):
 return dstType[n]
 
-print dstType, srcType

-raise type mispatch
+print(dstType, srcType)
+raise TypeError(type mispatch)
 
 class builtinProto():

 valueTypeStr = 
@@ -226,7 +227,7 @@ class builtinProto():
 
 def init_from_line(self, t):

 self.append('//{0}'.format(t))
-line = filter(None, re.split(',| |\(', t.rstrip(')\n')))
+line = [_f for _f in re.split(',| |\(', t.rstrip(')\n')) if _f]
 self.paramCount = 0
 stripped = 0
 memSpace = ''
@@ -310,7 +311,7 @@ class builtinProto():
 vtype = fixup_type(vtypeSeq, ptypeSeqs[n], i)
 if vtype[1] != ptype[1]:
 if ptype[1] != 1:
-raise parameter is not a scalar but has different width 
with result value.
+raise TypeError(parameter is not a scalar but has 
different width with result value.)
 if isPointer(ptype):
 formatStr += ''
 formatStr += 'param{0}'.format(n)
@@ -333,7 +334,7 @@ class builtinProto():
 
 def output(self):

 for line in self.outputStr:
-print line
+print(line)
 
 def output(self, outFile):

 for line in self.outputStr:
diff --git a/utests/utest_generator.py b/utests/utest_generator.py
index 510c41a..7d2d3a0 100644
--- a/utests/utest_generator.py
+++ b/utests/utest_generator.py
@@ -1,4 +1,5 @@
 #!/usr/bin/python
+from __future__ import print_function
 import os,sys,re
 
 FLT_MAX_POSI='0x1.fep127f'

@@ -326,7 +327,7 @@ which can print more values and information to assist 
debuging the issue.
 file_object.close()
 
   def nameForCmake(self,content,namesuffix):

-print(generated/%s_%s.cpp%(self.fileName,namesuffix)),
+print(generated/%s_%s.cpp%(self.fileName,namesuffix),end= )
 
   def utestFunc(self,index):

 funcLines=[]
Make the build scripts work in both Python 2 and Python 3.
(CMake prefers Python 2 if both are available, but will use
Python 3 if only it is installed.)

Signed-off-by: Rebecca Palmer rebecca_pal...@zoho.com

diff --git a/backend/src/libocl/script/gen_vector.py b/backend/src/libocl/script/gen_vector.py
index ffc573a..92582f5 100755
--- a/backend/src/libocl/script/gen_vector.py
+++ b/backend/src/libocl/script/gen_vector.py
@@ -20,13 +20,14 @@
 
 # This file is to generate inline code to lower down those builtin
 # vector functions to scalar functions.
+from __future__ import print_function
 import re
 import sys
 import os
 
 if len(sys.argv) != 4:
-print Invalid argument {0}.format(sys.argv)
-print use {0} spec_file_name output_file_name just_proto.format(sys.argv[0])
+print(Invalid argument {0}.format(sys.argv))
+print(use {0} spec_file_name output_file_name just_proto.format(sys.argv[0]))
 raise

  1   2   >