Re: [Beignet] [PATCH 5/5] Enable OpenCL 2.0 only where supported

2017-02-08 Thread Yang, Rong R
Because use -cl-std=CL1.2 by default when OpenCL 2.0 enabled, I prefer to 
always report address_bits = 32 now.
OpenCL spec consider only one address bits in one device, but when GEN9 now 
support both 32 bits and 64 bits address, so there is no way to comply with 
spec.
I think we could change both GEN9's OpenCL 1.2 and Open2.0 address to 64 bits 
after there is no obvious performance drop.

An other issue is that beignet OpenCL 2.0 don't support i386 system now, maybe 
we also need set CAN_OPENCL_20 to off in i386 system.

And also need to update readme after this patch merged.

The other part of the patchset LGTM. 

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Tuesday, January 24, 2017 16:48
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCH 5/5] Enable OpenCL 2.0 only where supported
> 
> From: Pan Xiuli 
> 
> This allows a single beignet binary to both offer 2.0 where available, and 
> still
> work on older hardware.
> V2: Default to 1.2 when -cl-std is not set (required by the OpenCL spec,
> and also likely to be faster).
> V3: Only enable OpenCL 2.0 when llvm version is 39.
> 
> Contributor: Rebecca N. Palmer 
> Signed-off-by: Pan Xiuli 
> ---
>  CMakeLists.txt  | 39 +--
>  backend/src/backend/program.cpp | 19 ++-
>  src/cl_device_data.h|  2 ++
>  src/cl_gen9_device.h|  2 ++
>  src/cl_gt_device.h  | 12 ++--
>  src/cl_platform_id.c|  2 +-
>  src/cl_platform_id.h|  6 --
>  7 files changed, 54 insertions(+), 28 deletions(-)
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt index 59abc45..75af35e 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -231,20 +231,15 @@ IF (EXPERIMENTAL_DOUBLE)
>ADD_DEFINITIONS(-DENABLE_FP64)
>  ENDIF(EXPERIMENTAL_DOUBLE)
> 
> -OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF) -IF
> (ENABLE_OPENCL_20)
> -  Find_Program(LSPCI lspci)
> -  IF (NOT LSPCI)
> -MESSAGE(FATAL_ERROR "Looking for lspci - not found")
> -  ENDIF (NOT LSPCI)
> -  EXECUTE_PROCESS(COMMAND
> "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh"
> -  RESULT_VARIABLE SUPPORT_OCL20_DEVICE
> -  OUTPUT_VARIABLE PCI_ID_NOT_USED)
> -
> -  IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
> -MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL
> 2.0 now, your device don't support.")
> -  ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
> +SET(CAN_OPENCL_20 ON)
> +IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
> +  SET(CAN_OPENCL_20 OFF)
> +ENDIF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN) IF
> (LLVM_VERSION_NODOT
> +VERSION_LESS 39)
> +  SET(CAN_OPENCL_20 OFF)
> +ENDIF (LLVM_VERSION_NODOT VERSION_LESS 39)
> 
> +IF (ENABLE_OPENCL_20)
>IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
>  MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later
> to enable OpenCL 2.0.")
>ENDIF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN) @@ -252,9 +247,25 @@
> IF (ENABLE_OPENCL_20)
>IF (LLVM_VERSION_NODOT VERSION_LESS 39)
>  MESSAGE(FATAL_ERROR "Please update LLVM to version 3.9 or later to
> enable OpenCL 2.0.")
>ENDIF (LLVM_VERSION_NODOT VERSION_LESS 39)
> +ENDIF(ENABLE_OPENCL_20)
> 
> +IF (DEFINED ENABLE_OPENCL_20)
> +  IF (ENABLE_OPENCL_20 AND CAN_OPENCL_20)
> +SET(CAN_OPENCL_20 ON)
> +  ELSE(ENABLE_OPENCL_20 AND CAN_OPENCL_20)
> +SET(CAN_OPENCL_20 OFF)
> +  ENDIF (ENABLE_OPENCL_20 AND CAN_OPENCL_20) ENDIF (DEFINED
> +ENABLE_OPENCL_20)
> +
> +OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support"
> ${CAN_OPENCL_20})
> +
> +IF (CAN_OPENCL_20)
> +  SET (ENABLE_OPENCL_20 ON)
> +  MESSAGE(STATUS "Building with OpenCL 2.0.")
>ADD_DEFINITIONS(-DENABLE_OPENCL_20)
> -ENDIF(ENABLE_OPENCL_20)
> +ELSE (CAN_OPENCL_20)
> +  MESSAGE(STATUS "Building with OpenCL 1.2.")
> +ENDIF(CAN_OPENCL_20)
> 
>  set (LIBCL_DRIVER_VERSION_MAJOR 1)
>  set (LIBCL_DRIVER_VERSION_MINOR 4)
> diff --git a/backend/src/backend/program.cpp
> b/backend/src/backend/program.cpp index 85d0aa9..09c79d8 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -31,6 +31,7 @@
>  #include "ir/value.hpp"
>  #include "ir/unit.hpp"
>  #include "ir/printf.hpp"
> +#include "src/cl_device_data.h"
> 
>  #ifdef GBE_COMPILER_AVAILABLE
>  #include "llvm/llvm_to_gen.hpp"
> @@ -855,6 +856,7 @@ namespace gbe {
>   size_t *errSize,
>   uint32_t )
>{
> +uint32_t maxoclVersion = oclVersion;
>  std::string pchFileName;
>  bool findPCH = false;
>  #if defined(__ANDROID__)
> @@ -1022,15 +1024,9 @@ EXTEND_QUOTE:
>  }
> 
>  if (useDefaultCLCVersion) {
> -#ifdef ENABLE_OPENCL_20
> -  clOpt.push_back("-D__OPENCL_C_VERSION__=200");
> -  

[Beignet] [PATCH 3/4] Add a mem stream class to support ELF write.

2017-02-08 Thread junyan . he
From: Junyan He 

Elf writer need to make sure seekp function work, while
sstream fails it. Implement wmemstreambuf to support
mem ostream which can support seek out of the range.

Signed-off-by: Junyan He 
---
 backend/src/CMakeLists.txt  |   1 +
 backend/src/backend/gen_program_elf.cpp | 124 
 2 files changed, 125 insertions(+)
 create mode 100644 backend/src/backend/gen_program_elf.cpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 7c1f4db..90db85b 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -128,6 +128,7 @@ set (GBE_SRC
 backend/gen9_context.hpp
 backend/gen9_context.cpp
 backend/gen_program.cpp
+backend/gen_program_elf.cpp
 backend/gen_program.hpp
 backend/gen_program.h
 backend/gen7_instruction.hpp
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
new file mode 100644
index 000..efd45fe
--- /dev/null
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ */
+#include "ocl_common_defines.h"
+#include "elfio/elfio.hpp"
+#include "backend/gen_program.hpp"
+#include "backend/gen_program.hpp"
+#include 
+#include 
+#include 
+
+namespace gbe
+{
+/* The elf writer need to make sure seekp function work, so sstream
+   can not work, and we do not want the fostream to generate the real
+   file. We just want to keep the elf image in the memory. Implement
+   a simple streambuf write only class here. */
+class wmemstreambuf : public std::streambuf
+{
+public:
+  wmemstreambuf(size_t size) : max_writed(0)
+  {
+buf_ = static_cast(::malloc(size));
+memset(buf_, 0, size);
+buf_size_ = size;
+setbuf(buf_, buf_size_);
+  }
+  ~wmemstreambuf()
+  {
+if (buf_)
+  ::free(buf_);
+  }
+
+  char *getcontent(size_t _sz)
+  {
+total_sz = max_writed;
+return buf_;
+  }
+
+protected:
+  char *buf_;
+  std::streamsize buf_size_;
+  std::streamsize max_writed;
+
+  virtual std::streambuf *setbuf(char *s, std::streamsize n)
+  {
+auto const begin(s);
+auto const end(s + n);
+setp(begin, end);
+return this;
+  }
+
+  virtual std::streampos seekpos(std::streampos pos,
+ std::ios_base::openmode which =
+   ::std::ios_base::in | ::std::ios_base::out)
+  {
+if (which != std::ios_base::out) {
+  assert(0);
+  return pos_type(off_type(-1));
+}
+
+if (pos >= epptr() - pbase()) {
+  auto old_size = buf_size_;
+  while (buf_size_ < pos) {
+buf_size_ *= 2;
+  }
+
+  buf_ = static_cast(::realloc(buf_, buf_size_));
+  memset(buf_ + old_size, 0, buf_size_ - old_size);
+  setbuf(buf_, buf_size_);
+} else {
+  setp(pbase(), epptr());
+}
+
+pbump(pos);
+return pos;
+  }
+
+  virtual int sync() { return 0; }
+  virtual int overflow(int c) { return c; };
+
+  virtual std::streamsize xsgetn(const char *s, std::streamsize count)
+  {
+assert(0);
+return traits_type::eof();
+  }
+
+  virtual std::streamsize xsputn(const char *s, std::streamsize const count)
+  {
+if (epptr() - pptr() < count) {
+  auto old_pos = pptr() - pbase();
+  while (buf_size_ < (pptr() - pbase()) + count) {
+buf_size_ *= 2;
+  }
+  buf_ = static_cast(::realloc(buf_, buf_size_));
+  memset(buf_ + old_pos, 0, buf_size_ - old_pos);
+  setbuf(buf_, buf_size_);
+  pbump(old_pos);
+}
+
+std::memcpy(pptr(), s, count);
+if (pptr() - pbase() + count > max_writed)
+  max_writed = pptr() - pbase() + count;
+
+pbump(count);
+
+return count;
+  }
+};
+} /* namespace gbe */
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/4] Modify the elfio lib, make it generate memory image.

2017-02-08 Thread junyan . he
From: Junyan He 

We do not want to write the elf binary directly to a file.
We prefer to keep it in the memory and analyse the elf image
in runtime.

Signed-off-by: Junyan He 
---
 backend/src/elfio/elfio.hpp| 37 ++---
 backend/src/elfio/elfio_header.hpp |  1 -
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/backend/src/elfio/elfio.hpp b/backend/src/elfio/elfio.hpp
index 3a86c93..7b9501c 100644
--- a/backend/src/elfio/elfio.hpp
+++ b/backend/src/elfio/elfio.hpp
@@ -148,14 +148,7 @@ class elfio
 }
 
 
//--
-bool save( const std::string& file_name )
-{
-std::ofstream f( file_name.c_str(), std::ios::out | std::ios::binary );
-
-if ( !f ) {
-return false;
-}
-
+bool save( std::ostream ) {
 bool is_still_good = true;
 
 // Define layout specific header fields
@@ -175,15 +168,29 @@ class elfio
 is_still_good = is_still_good && layout_sections_without_segments();
 is_still_good = is_still_good && layout_section_table();
 
-is_still_good = is_still_good && save_header( f );
-is_still_good = is_still_good && save_sections( f );
-is_still_good = is_still_good && save_segments( f );
+is_still_good = is_still_good && save_header( stream );
+is_still_good = is_still_good && save_sections( stream );
+is_still_good = is_still_good && save_segments( stream );
 
-f.close();
 
 return is_still_good;
 }
 
+bool save( const std::string& file_name )
+{
+bool ret;
+
+std::ofstream f( file_name.c_str(), std::ios::out | std::ios::binary );
+
+if ( !f ) {
+return false;
+}
+
+ret = save( f );
+f.close();
+return ret;
+}
+
 
//--
 // ELF header access functions
 ELFIO_HEADER_ACCESS_GET( unsigned char, class  );
@@ -435,13 +442,13 @@ class elfio
 }
 
 
//--
-bool save_header( std::ofstream& f )
+bool save_header( std::ostream& f )
 {
 return header->save( f );
 }
 
 
//--
-bool save_sections( std::ofstream& f )
+bool save_sections( std::ostream& f )
 {
 for ( unsigned int i = 0; i < sections_.size(); ++i ) {
 section *sec = sections_.at(i);
@@ -456,7 +463,7 @@ class elfio
 }
 
 
//--
-bool save_segments( std::ofstream& f )
+bool save_segments( std::ostream& f )
 {
 for ( unsigned int i = 0; i < segments_.size(); ++i ) {
 segment *seg = segments_.at(i);
diff --git a/backend/src/elfio/elfio_header.hpp 
b/backend/src/elfio/elfio_header.hpp
index d689a88..16e1dee 100644
--- a/backend/src/elfio/elfio_header.hpp
+++ b/backend/src/elfio/elfio_header.hpp
@@ -111,7 +111,6 @@ template< class T > class elf_header_impl : public 
elf_header
 {
 stream.seekp( 0 );
 stream.write( reinterpret_cast(  ), sizeof( header 
) );
-
 return stream.good();
 }
 
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 4/4] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-02-08 Thread junyan . he
From: Junyan He 

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

Signed-off-by: Junyan He 
---
 backend/src/backend/gen_program.hpp |   7 +
 backend/src/backend/gen_program_elf.cpp | 357 
 backend/src/backend/program.hpp |   2 +
 backend/src/gbe_bin_interpreter.cpp |   1 +
 4 files changed, 367 insertions(+)

diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ff756e0..8963c38 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,6 +60,9 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+GenProgramElfContext* elf_ctx;
+
   public:
 /*! Create an empty program */
 GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
@@ -70,6 +75,8 @@ namespace gbe
 virtual void CleanLlvmResource(void);
 /*! Implements base class */
 virtual Kernel *compileKernel(const ir::Unit , const std::string 
, bool relaxMath, int profiling);
+/*! Generate binary format */
+virtual void *toBinaryFormat(size_t _size);
 /*! Allocate an empty kernel. */
 virtual Kernel *allocateKernel(const std::string ) {
   return GBE_NEW(GenKernel, name, deviceID);
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index efd45fe..0440e81 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+using namespace std;
 
 namespace gbe
 {
@@ -121,4 +122,360 @@ protected:
 return count;
   }
 };
+
+using namespace ELFIO;
+
+class GenProgramElfContext
+{
+public:
+  enum { // 0, 1, 2 already have meanings
+GEN_NOTE_TYPE_GPU_INFO = 3,
+GEN_NOTE_TYPE_CL_ARG_INFO = 4,
+GEN_NOTE_TYPE_CL_WORKGROUP_SIZE = 5,
+  };
+
+  GenProgram 
+  void emitOneKernel(GenKernel );
+  elfio writer;
+  section *text_sec;
+  section *sym_sec;
+  section *strtab_sec;
+  section *ker_info_sec;
+  section *rodata_sec;
+  symbol_section_accessor *syma;
+  string_section_accessor *stra;
+  note_section_accessor *note_writer;
+  Elf32_Word sym_num;
+  Elf64_Word bitcode_offset;
+
+  ~GenProgramElfContext(void)
+  {
+if (syma)
+  GBE_DELETE(syma);
+if (stra)
+  GBE_DELETE(stra);
+if (note_writer)
+  GBE_DELETE(note_writer);
+  }
+
+  GenProgramElfContext(GenProgram ) : genProg(prog), text_sec(NULL), 
sym_sec(NULL),
+   strtab_sec(NULL), 
ker_info_sec(NULL), rodata_sec(NULL),
+   syma(NULL), stra(NULL), 
note_writer(NULL), sym_num(0),
+   bitcode_offset(0)
+  {
+writer.create(ELFCLASS64, ELFDATA2LSB);
+writer.set_os_abi(ELFOSABI_LINUX);
+writer.set_type(ET_REL);
+writer.set_machine(EM_INTEL205); // TODO: Some value of Intel GPU;
+
+// Create code section
+text_sec = writer.sections.add(".text");
+text_sec->set_type(SHT_PROGBITS);
+text_sec->set_flags(SHF_ALLOC | SHF_EXECINSTR);
+text_sec->set_addr_align(4);
+
+// Create string table section
+strtab_sec = writer.sections.add(".strtab");
+strtab_sec->set_type(SHT_STRTAB);
+strtab_sec->set_addr_align(1);
+
+// Create symbol table section
+sym_sec = writer.sections.add(".symtab");
+sym_sec->set_type(SHT_SYMTAB);
+sym_sec->set_addr_align(0x4);
+sym_sec->set_entry_size(writer.get_default_entry_size(SHT_SYMTAB));
+sym_sec->set_link(strtab_sec->get_index());
+sym_sec->set_info(0x01);
+
+// Create kernel info section
+ker_info_sec = writer.sections.add(".note.gpu_info");
+ker_info_sec->set_type(SHT_NOTE);
+text_sec->set_flags(SHF_ALLOC);
+ker_info_sec->set_addr_align(0x04);
+
+// Create string table writer
+stra = GBE_NEW(string_section_accessor, strtab_sec);
+// Create symbol table writer
+syma = GBE_NEW(symbol_section_accessor, writer, sym_sec);
+// Create note writer
+note_writer = GBE_NEW(note_section_accessor, writer, ker_info_sec);
+  }
+};
+
+void GenProgramElfContext::emitOneKernel(GenKernel )
+{
+  assert(text_sec != NULL);
+  assert(sym_sec != NULL);
+  assert(text_sec != NULL);
+  assert(syma != NULL);
+  assert(stra != NULL);
+
+  sym_num++;
+
+  // Add the kernel's bitcode to .text section
+  text_sec->append_data(kernel.getCode(), kernel.getCodeSize());
+  // Add the kernel func as a symbol
+  syma->add_symbol(*stra,