Re: [Beignet] [Patch V2] intel: Export pooled EU and min no. of eus in a pool.

2016-07-15 Thread Chris Wilson
On Fri, Jul 15, 2016 at 11:37:41AM +0100, Arun Siluvery wrote:
> On 15/07/2016 08:08, Yang Rong wrote:
> >Update kernel interface with new I915_GETPARAM ioctl entries for
> >pooled EU and min no. of eus in a pool. Add a wrapping function
> >for each parameter. Userspace drivers need these values when decide
> >the thread count. This kernel enabled pooled eu by default for BXT
> >and for fused down 2x6 parts it is advised to turn it off.
> >
> >But there is another HW issue in these parts (fused
> >down 2x6 parts) before C0 that requires Pooled EU to be enabled as a
> >workaround. In this case the pool configuration changes depending upon
> >which subslice is disabled and the no. of eus in a pool is different,
> >So userspace need to know min no. of eus in a pool.
> >
> >V2: use return value as the query results.
> > ret < 0 when error, ret = 0 when not support, and ret > 0 indicate
> > query results.(Chris)
> >
> >Signed-off-by: Yang Rong 
> >---
> 
> [+ chris, intel-gfx]
> 
> 
> regards
> Arun
> 
> >  include/drm/i915_drm.h   |  2 ++
> >  intel/intel_bufmgr.h |  3 +++
> >  intel/intel_bufmgr_gem.c | 32 
> >  3 files changed, 37 insertions(+)
> >
> >diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> >index c4ce6b2..eb611a7 100644
> >--- a/include/drm/i915_drm.h
> >+++ b/include/drm/i915_drm.h
> >@@ -357,6 +357,8 @@ typedef struct drm_i915_irq_wait {
> >  #define I915_PARAM_HAS_GPU_RESET35
> >  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> >  #define I915_PARAM_HAS_EXEC_SOFTPIN 37
> >+#define I915_PARAM_HAS_POOLED_EU 38
> >+#define I915_PARAM_MIN_EU_IN_POOL39
> >
> >  typedef struct drm_i915_getparam {
> > __s32 param;
> >diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> >index a1abbcd..96a4d9d 100644
> >--- a/intel/intel_bufmgr.h
> >+++ b/intel/intel_bufmgr.h
> >@@ -273,6 +273,9 @@ int drm_intel_get_reset_stats(drm_intel_context *ctx,
> >  int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total);
> >  int drm_intel_get_eu_total(int fd, unsigned int *eu_total);
> >
> >+int drm_intel_get_pooled_eu(int fd);
> >+int drm_intel_get_min_eu_in_pool(int fd);
> >+
> >  /** @{ Compatibility defines to keep old code building despite the symbol 
> > rename
> >   * from dri_* to drm_intel_*
> >   */
> >diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> >index 0a4012b..4d9899d 100644
> >--- a/intel/intel_bufmgr_gem.c
> >+++ b/intel/intel_bufmgr_gem.c
> >@@ -3237,6 +3237,38 @@ drm_intel_get_eu_total(int fd, unsigned int *eu_total)
> > return 0;
> >  }
> >
> >+int
> >+drm_intel_get_pooled_eu(int fd)
> >+{
> >+drm_i915_getparam_t gp;
> >+int ret;
> >+
> >+memclear(gp);
> >+gp.param = I915_PARAM_HAS_POOLED_EU;
> >+gp.value = 
> >+ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, );
> >+if (ret)
> >+return -errno;

Do I need to point out how the above is broken?

if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
return -errno;
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [Patch V2] intel: Export pooled EU and min no. of eus in a pool.

2016-07-15 Thread Arun Siluvery

On 15/07/2016 08:08, Yang Rong wrote:

Update kernel interface with new I915_GETPARAM ioctl entries for
pooled EU and min no. of eus in a pool. Add a wrapping function
for each parameter. Userspace drivers need these values when decide
the thread count. This kernel enabled pooled eu by default for BXT
and for fused down 2x6 parts it is advised to turn it off.

But there is another HW issue in these parts (fused
down 2x6 parts) before C0 that requires Pooled EU to be enabled as a
workaround. In this case the pool configuration changes depending upon
which subslice is disabled and the no. of eus in a pool is different,
So userspace need to know min no. of eus in a pool.

V2: use return value as the query results.
 ret < 0 when error, ret = 0 when not support, and ret > 0 indicate
 query results.(Chris)

Signed-off-by: Yang Rong 
---


[+ chris, intel-gfx]


regards
Arun


  include/drm/i915_drm.h   |  2 ++
  intel/intel_bufmgr.h |  3 +++
  intel/intel_bufmgr_gem.c | 32 
  3 files changed, 37 insertions(+)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c4ce6b2..eb611a7 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -357,6 +357,8 @@ typedef struct drm_i915_irq_wait {
  #define I915_PARAM_HAS_GPU_RESET   35
  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
  #define I915_PARAM_HAS_EXEC_SOFTPIN37
+#define I915_PARAM_HAS_POOLED_EU 38
+#define I915_PARAM_MIN_EU_IN_POOL39

  typedef struct drm_i915_getparam {
__s32 param;
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index a1abbcd..96a4d9d 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -273,6 +273,9 @@ int drm_intel_get_reset_stats(drm_intel_context *ctx,
  int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total);
  int drm_intel_get_eu_total(int fd, unsigned int *eu_total);

+int drm_intel_get_pooled_eu(int fd);
+int drm_intel_get_min_eu_in_pool(int fd);
+
  /** @{ Compatibility defines to keep old code building despite the symbol 
rename
   * from dri_* to drm_intel_*
   */
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 0a4012b..4d9899d 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -3237,6 +3237,38 @@ drm_intel_get_eu_total(int fd, unsigned int *eu_total)
return 0;
  }

+int
+drm_intel_get_pooled_eu(int fd)
+{
+   drm_i915_getparam_t gp;
+   int ret;
+
+   memclear(gp);
+   gp.param = I915_PARAM_HAS_POOLED_EU;
+   gp.value = 
+   ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, );
+   if (ret)
+   return -errno;
+
+   return ret;
+}
+
+int
+drm_intel_get_min_eu_in_pool(int fd)
+{
+   drm_i915_getparam_t gp;
+   int ret;
+
+   memclear(gp);
+   gp.param = I915_PARAM_MIN_EU_IN_POOL;
+   gp.value = 
+   ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, );
+   if (ret)
+   return -errno;
+
+   return ret;
+}
+
  /**
   * Annotate the given bo for use in aub dumping.
   *



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Runtime: Add CL base object for all cl objects.

2016-07-15 Thread Simon Richter
Hi,

On 14.07.2016 10:15, junyan...@inbox.com wrote:

> The runtime code is a little verbose in CL object handle.
> Every CL objects should have a reference, a lock to protect itself
> and an ICD dispatcher. We can organize them to a struct and place
> it at the beginning of each CL object.

Does that mean that only a single call to DEFINE_ICD() and SET_ICD()
remains? If so, can/should these be inlined?

   Simon




signature.asc
Description: OpenPGP digital signature
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH V2 5/6] Backend: Add intel_sub_group_shuffle_down/up/xor

2016-07-15 Thread Yang, Rong R
The first 4 patches is LGTM, will push them.

For intel_sub_group_shuffle_down/up implement, we'd better handle it in the 
opencl c level.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Thursday, July 7, 2016 11:10
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCH V2 5/6] Backend: Add
> intel_sub_group_shuffle_down/up/xor
> 
> From: Pan Xiuli 
> 
> Using a function shuffle delta for down/up, using some flags for current and
> down/up src switch. The flags and index is pre caculated in libocl.
> The shuffle delta only handle flag mask the dst with different src.
> Using the old shuffle with xor for shuffle_xor.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  backend/src/backend/gen_insn_selection.cpp | 65
> ++
>  backend/src/ir/instruction.cpp | 44 
>  backend/src/ir/instruction.hpp |  9 +
>  backend/src/ir/instruction.hxx |  1 +
>  backend/src/libocl/script/ocl_simd.def |  9 +
>  backend/src/libocl/tmpl/ocl_simd.tmpl.cl   | 34 
>  backend/src/libocl/tmpl/ocl_simd.tmpl.h|  9 +
>  backend/src/llvm/llvm_gen_backend.cpp  | 13 ++
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  2 +
>  9 files changed, 186 insertions(+)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index e342161..7b646e0 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -6738,6 +6738,70 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>  DECL_CTOR(MediaBlockWriteInstruction, 1, 1);
>};
> 
> +  /*! SIMD shuffle delta pattern */
> +  DECL_PATTERN(SimdShuffleDeltaInstruction)
> +  {
> +bool emitOne(Selection::Opaque , const
> ir::SimdShuffleDeltaInstruction , bool ) const
> +{
> +  using namespace ir;
> +  const GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32);
> +  const GenRegister srcx = sel.selReg(insn.getSrc(0), TYPE_U32);
> +  const GenRegister srcy = sel.selReg(insn.getSrc(1), TYPE_U32);
> +  const GenRegister index = sel.selReg(insn.getSrc(2), TYPE_U32);
> +  const GenRegister inRange = sel.selReg(insn.getSrc(3), TYPE_U32);
> +  const GenRegister constZero = GenRegister::immud(0);;
> +  const GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD),
> TYPE_U32);
> +  bool hasShiftL = false;
> +
> +  sel.push();
> +  sel.curr.predicate = GEN_PREDICATE_NONE;
> +  /* First shuffle for srcx */
> +  if (sel.isScalarReg(insn.getSrc(0))) {
> +sel.MOV(dst, srcx);
> +  } else {
> +if (index.file == GEN_IMMEDIATE_VALUE) {
> +  sel.push();
> +  uint32_t offset = index.value.ud % sel.curr.execWidth;
> +  GenRegister reg = GenRegister::subphysicaloffset(srcx, offset);
> +  reg.vstride = GEN_VERTICAL_STRIDE_0;
> +  reg.hstride = GEN_HORIZONTAL_STRIDE_0;
> +  reg.width = GEN_WIDTH_1;
> +  sel.MOV(dst, reg);
> +  sel.push();
> +} else {
> +  sel.SHL(shiftL, index, GenRegister::immud(0x2));
> +  hasShiftL = true;
> +  sel.SIMD_SHUFFLE(dst, srcx, shiftL);
> +}
> +  }
> +  sel.curr.flag = 0;
> +  sel.curr.subFlag = 1;
> +  sel.CMP(GEN_CONDITIONAL_EQ, inRange, constZero);
> +  sel.curr.predicate = GEN_PREDICATE_NORMAL;
> +  /* Now shuffle for srcy */
> +  if (sel.isScalarReg(insn.getSrc(1))) {
> +sel.MOV(dst, srcy);
> +  } else {
> +if (index.file == GEN_IMMEDIATE_VALUE) {
> +  sel.push();
> +  uint32_t offset = index.value.ud % sel.curr.execWidth;
> +  GenRegister reg = GenRegister::subphysicaloffset(srcy, offset);
> +  reg.vstride = GEN_VERTICAL_STRIDE_0;
> +  reg.hstride = GEN_HORIZONTAL_STRIDE_0;
> +  reg.width = GEN_WIDTH_1;
> +  sel.MOV(dst, reg);
> +  sel.pop();
> +} else {
> +  if (!hasShiftL)
> +sel.SHL(shiftL, index, GenRegister::immud(0x2));
> +  sel.SIMD_SHUFFLE(dst, srcy, shiftL);
> +}
> +  }
> +  sel.pop();
> +  return true;
> +}
> +DECL_CTOR(SimdShuffleDeltaInstruction, 1, 1);  };
> 
>/*! Sort patterns */
>INLINE bool cmp(const SelectionPattern *p0, const SelectionPattern *p1)
> { @@ -6782,6 +6846,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>  this->insert();
>  this->insert();
>  this->insert();
> +this->insert();
> 
>  // Sort all the patterns with the number of instructions they output
>  for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git
> a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index
> ed64580..a274626 100644
> --- 

[Beignet] cl_get_gt_device(): error while running OPenCL on Kabylake

2016-07-15 Thread Mahesh Kumar
Hi , i get the following error when i try to run some opencl benchmark on 
Kabylake. How can i fix this?

cl_get_gt_device(): error, unknown device: 5916 
error calling clGetDeviceIDs 

The same works on older intel families. Is there a support missing for 
Kabylake/.?
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet