[Mesa-dev] [Bug 100091] Failure to create folder for on-disk shader cache

2017-03-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100091

--- Comment #27 from Timothy Arceri  ---
https://patchwork.freedesktop.org/series/21582/(In reply to John from comment
#26)
> Timothy, do you have any patch you'd like me to test?
> 
> Thanks!
> John

This should do it:

https://patchwork.freedesktop.org/series/21582/

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] util/disk_cache: write cache entry keys to file header

2017-03-20 Thread Timothy Arceri
This can be used to deal with key hash collisions from different
versions (should we find that to actually happen) and to find
which mesa version produced the cache entry.
---

 I considered creating the cache key blob at cache creation time but since
 we would want a blob with the null terminator dropped for the hash and one
 that includes the terminator for reading the strings from the header, I
 decided just to create it on the fly to keep the code easier to follow.  

 src/util/disk_cache.c | 91 +++
 1 file changed, 85 insertions(+), 6 deletions(-)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 599e58f..f2d67c9 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -737,33 +737,60 @@ create_put_job(struct disk_cache *cache, const cache_key 
key,
 }
 
 static void
 destroy_put_job(void *job, int thread_index)
 {
if (job) {
   free(job);
}
 }
 
+static size_t
+cache_keys_size(struct disk_cache *cache)
+{
+   /* The 2 is for the terminating null bytes of the strings */
+   return cache->timestamp_size + cache->gpu_name_size +
+  sizeof(cache->ptr_size) + 2;
+}
+
+static uint8_t *
+generate_cache_keys_blob(struct disk_cache *cache, size_t ck_size)
+{
+   uint8_t *key_blob = malloc(ck_size);
+   if (!key_blob)
+  return NULL;
+
+   size_t ts_size = cache->timestamp_size + 1;
+   memcpy(key_blob, cache->timestamp, ts_size);
+
+   size_t gn_size = cache->gpu_name_size + 1;
+   memcpy(key_blob + ts_size, cache->gpu_name, gn_size);
+
+   memcpy(key_blob + ts_size + gn_size, >ptr_size, sizeof(uint8_t));
+
+   return key_blob;
+}
+
 struct cache_entry_file_data {
uint32_t crc32;
uint32_t uncompressed_size;
 };
 
 static void
 cache_put(void *job, int thread_index)
 {
assert(job);
 
int fd = -1, fd_final = -1, err, ret;
unsigned i = 0;
char *filename = NULL, *filename_tmp = NULL;
+   uint8_t *key_blob = NULL;
struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
 
filename = get_cache_file(dc_job->cache, dc_job->key);
if (filename == NULL)
   goto done;
 
/* If the cache is too large, evict something else first. */
while (*dc_job->cache->size + dc_job->size > dc_job->cache->max_size &&
   i < 8) {
   evict_lru_item(dc_job->cache);
@@ -808,23 +835,41 @@ cache_put(void *job, int thread_index)
 */
fd_final = open(filename, O_RDONLY | O_CLOEXEC);
if (fd_final != -1) {
   unlink(filename_tmp);
   goto done;
}
 
/* OK, we're now on the hook to write out a file that we know is
 * not in the cache, and is also not being written out to the cache
 * by some other process.
-*
-* Create CRC of the data and store at the start of the file. We will
-* read this when restoring the cache and use it to check for corruption.
+*/
+
+   /* Write the key_blob, this can be used find information about the
+* mesa version that produced the entry or deal with hash collisions,
+* should that ever become a real problem.
+*/
+   size_t ck_size = cache_keys_size(dc_job->cache);
+   key_blob = generate_cache_keys_blob(dc_job->cache, ck_size);
+   if (!key_blob) {
+  unlink(filename_tmp);
+  goto done;
+   }
+
+   ret = write_all(fd, key_blob, ck_size);
+   if (ret == -1) {
+  unlink(filename_tmp);
+  goto done;
+   }
+
+   /* Create CRC of the data. We will read this when restoring the cache and
+* use it to check for corruption.
 */
struct cache_entry_file_data cf_data;
cf_data.crc32 = util_hash_crc32(dc_job->data, dc_job->size);
cf_data.uncompressed_size = dc_job->size;
 
size_t cf_data_size = sizeof(cf_data);
ret = write_all(fd, _data, cf_data_size);
if (ret == -1) {
   unlink(filename_tmp);
   goto done;
@@ -839,35 +884,37 @@ cache_put(void *job, int thread_index)
if (file_size == 0) {
   unlink(filename_tmp);
   goto done;
}
ret = rename(filename_tmp, filename);
if (ret == -1) {
   unlink(filename_tmp);
   goto done;
}
 
-   file_size += cf_data_size;
+   file_size += cf_data_size + ck_size;
p_atomic_add(dc_job->cache->size, file_size);
 
  done:
if (fd_final != -1)
   close(fd_final);
/* This close finally releases the flock, (now that the final file
 * has been renamed into place and the size has been added).
 */
if (fd != -1)
   close(fd);
if (filename_tmp)
   free(filename_tmp);
if (filename)
   free(filename);
+   if (key_blob)
+  free(key_blob);
 }
 
 void
 disk_cache_put(struct disk_cache *cache, const cache_key key,
const void *data, size_t size)
 {
struct disk_cache_put_job *dc_job =
   create_put_job(cache, key, data, size);
 
if (dc_job) {
@@ -936,32 +983,64 @@ disk_cache_get(struct disk_cache *cache, const cache_key 
key, size_t *size)
if (fd == -1)
   goto fail;
 
if (fstat(fd, ) == -1)
   goto fail;
 

[Mesa-dev] [PATCH 1/3] util/disk_cache: hash timestamps into the cache keys

2017-03-20 Thread Timothy Arceri
From: Grazvydas Ignotas 

Instead of using a directory, hash the timestamps into the cache keys
themselves. Since there is no more timestamp directory, there is no more
need for deleting the cache of other mesa versions and we rely on
eviction to clean up the old cache entries. This solves the problem of
using several incarnations of disk_cache at the same time, where one
deletes a directory belonging to the other, like when both OpenGL and
gallium nine are used simultaneously (or several different mesa
installations).

v2: using additional blob instead of trying to clone sha1 state

v3: (Timothy Arceri) don't use an opaque data type to store
timestamp.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100091
Signed-off-by: Grazvydas Ignotas 
---
 src/compiler/glsl/tests/cache_test.c | 10 +++---
 src/util/disk_cache.c| 70 ++--
 2 files changed, 24 insertions(+), 56 deletions(-)

diff --git a/src/compiler/glsl/tests/cache_test.c 
b/src/compiler/glsl/tests/cache_test.c
index b1b3c33..b604943 100644
--- a/src/compiler/glsl/tests/cache_test.c
+++ b/src/compiler/glsl/tests/cache_test.c
@@ -121,21 +121,21 @@ remove_entry(const char *path,
 static int
 rmrf_local(const char *path)
 {
if (path == NULL || *path == '\0' || *path != '.')
   return -1;
 
return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS | FTW_MOUNT);
 }
 
 static void
-check_timestamp_and_gpu_id_directories_created(char *cache_dir)
+check_directories_created(char *cache_dir)
 {
bool sub_dirs_created = false;
 
char buf[PATH_MAX];
if (getcwd(buf, PATH_MAX)) {
   char *full_path = NULL;
   if (asprintf(_path, "%s%s", buf, ++cache_dir) != -1 ) {
  struct stat sb;
  if (stat(full_path, ) != -1 && S_ISDIR(sb.st_mode))
 sub_dirs_created = true;
@@ -177,48 +177,48 @@ test_disk_cache_create(void)
 
/* Test with XDG_CACHE_HOME set */
setenv("XDG_CACHE_HOME", CACHE_TEST_TMP "/xdg-cache-home", 1);
cache = disk_cache_create("test", "make_check");
expect_null(cache, "disk_cache_create with XDG_CACHE_HOME set with"
"a non-existing parent directory");
 
/* Create string with expected directory hierarchy */
char expected_dir_h[255];
sprintf(expected_dir_h, "%s%s%s", CACHE_TEST_TMP "/xdg-cache-home/mesa/",
-   get_arch_bitness_str(), "/make_check/test");
+   get_arch_bitness_str(), "/test");
 
mkdir(CACHE_TEST_TMP, 0755);
cache = disk_cache_create("test", "make_check");
expect_non_null(cache, "disk_cache_create with XDG_CACHE_HOME set");
 
-   check_timestamp_and_gpu_id_directories_created(expected_dir_h);
+   check_directories_created(expected_dir_h);
 
disk_cache_destroy(cache);
 
/* Test with MESA_GLSL_CACHE_DIR set */
err = rmrf_local(CACHE_TEST_TMP);
expect_equal(err, 0, "Removing " CACHE_TEST_TMP);
 
setenv("MESA_GLSL_CACHE_DIR", CACHE_TEST_TMP "/mesa-glsl-cache-dir", 1);
cache = disk_cache_create("test", "make_check");
expect_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set with"
"a non-existing parent directory");
 
sprintf(expected_dir_h, "%s%s%s", CACHE_TEST_TMP
"/mesa-glsl-cache-dir/mesa/", get_arch_bitness_str(),
-   "/make_check/test");
+   "/test");
 
mkdir(CACHE_TEST_TMP, 0755);
cache = disk_cache_create("test", "make_check");
expect_non_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set");
 
-   check_timestamp_and_gpu_id_directories_created(expected_dir_h);
+   check_directories_created(expected_dir_h);
 
disk_cache_destroy(cache);
 }
 
 static bool
 does_cache_contain(struct disk_cache *cache, cache_key key)
 {
void *result;
 
result = disk_cache_get(cache, key, NULL);
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index dd3cadb..eb17206 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -71,20 +71,24 @@ struct disk_cache {
size_t index_mmap_size;
 
/* Pointer to total size of all objects in cache (within index_mmap) */
uint64_t *size;
 
/* Pointer to stored keys, (within index_mmap). */
uint8_t *stored_keys;
 
/* Maximum size of all cached objects (in bytes). */
uint64_t max_size;
+
+   /* Driver cache keys. */
+   char *timestamp;
+   size_t timestamp_size;
 };
 
 struct disk_cache_put_job {
struct util_queue_fence fence;
 
struct disk_cache *cache;
 
cache_key key;
 
/* Copy of cache data to be compressed and written. */
@@ -147,80 +151,36 @@ concatenate_and_mkdir(void *ctx, const char *path, const 
char *name)
   return NULL;
 
new_path = ralloc_asprintf(ctx, "%s/%s", path, name);
 
if (mkdir_if_needed(new_path) == 0)
   return new_path;
else
   return NULL;
 }
 
-static int
-remove_dir(const char *fpath, const struct stat *sb,
-   int typeflag, struct FTW *ftwbuf)
-{
-   if 

[Mesa-dev] [PATCH 2/3] util/disk_cache: hash pointer size and gpu name into cache keys

2017-03-20 Thread Timothy Arceri
From: Grazvydas Ignotas 

This allows to get rid of the arch and gpu name directories.

v2: (Timothy Arceri) don't use an opaque data type to store
pointer size and gpu name.

Signed-off-by: Grazvydas Ignotas 
---
 src/compiler/glsl/tests/cache_test.c | 13 ++-
 src/util/disk_cache.c| 44 +++-
 src/util/disk_cache.h| 17 --
 3 files changed, 20 insertions(+), 54 deletions(-)

diff --git a/src/compiler/glsl/tests/cache_test.c 
b/src/compiler/glsl/tests/cache_test.c
index b604943..2302f44 100644
--- a/src/compiler/glsl/tests/cache_test.c
+++ b/src/compiler/glsl/tests/cache_test.c
@@ -174,51 +174,42 @@ test_disk_cache_create(void)
expect_non_null(cache, "disk_cache_create with no environment variables");
 
disk_cache_destroy(cache);
 
/* Test with XDG_CACHE_HOME set */
setenv("XDG_CACHE_HOME", CACHE_TEST_TMP "/xdg-cache-home", 1);
cache = disk_cache_create("test", "make_check");
expect_null(cache, "disk_cache_create with XDG_CACHE_HOME set with"
"a non-existing parent directory");
 
-   /* Create string with expected directory hierarchy */
-   char expected_dir_h[255];
-   sprintf(expected_dir_h, "%s%s%s", CACHE_TEST_TMP "/xdg-cache-home/mesa/",
-   get_arch_bitness_str(), "/test");
-
mkdir(CACHE_TEST_TMP, 0755);
cache = disk_cache_create("test", "make_check");
expect_non_null(cache, "disk_cache_create with XDG_CACHE_HOME set");
 
-   check_directories_created(expected_dir_h);
+   check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/mesa");
 
disk_cache_destroy(cache);
 
/* Test with MESA_GLSL_CACHE_DIR set */
err = rmrf_local(CACHE_TEST_TMP);
expect_equal(err, 0, "Removing " CACHE_TEST_TMP);
 
setenv("MESA_GLSL_CACHE_DIR", CACHE_TEST_TMP "/mesa-glsl-cache-dir", 1);
cache = disk_cache_create("test", "make_check");
expect_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set with"
"a non-existing parent directory");
 
-   sprintf(expected_dir_h, "%s%s%s", CACHE_TEST_TMP
-   "/mesa-glsl-cache-dir/mesa/", get_arch_bitness_str(),
-   "/test");
-
mkdir(CACHE_TEST_TMP, 0755);
cache = disk_cache_create("test", "make_check");
expect_non_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set");
 
-   check_directories_created(expected_dir_h);
+   check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/mesa");
 
disk_cache_destroy(cache);
 }
 
 static bool
 does_cache_contain(struct disk_cache *cache, cache_key key)
 {
void *result;
 
result = disk_cache_get(cache, key, NULL);
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index eb17206..599e58f 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -75,20 +75,23 @@ struct disk_cache {
 
/* Pointer to stored keys, (within index_mmap). */
uint8_t *stored_keys;
 
/* Maximum size of all cached objects (in bytes). */
uint64_t max_size;
 
/* Driver cache keys. */
char *timestamp;
size_t timestamp_size;
+   char *gpu_name;
+   size_t gpu_name_size;
+   uint8_t ptr_size;
 };
 
 struct disk_cache_put_job {
struct util_queue_fence fence;
 
struct disk_cache *cache;
 
cache_key key;
 
/* Copy of cache data to be compressed and written. */
@@ -151,43 +154,20 @@ concatenate_and_mkdir(void *ctx, const char *path, const 
char *name)
   return NULL;
 
new_path = ralloc_asprintf(ctx, "%s/%s", path, name);
 
if (mkdir_if_needed(new_path) == 0)
   return new_path;
else
   return NULL;
 }
 
-static char *
-create_mesa_cache_dir(void *mem_ctx, const char *path, const char *gpu_name)
-{
-   char *new_path = concatenate_and_mkdir(mem_ctx, path, "mesa");
-   if (new_path == NULL)
-  return NULL;
-
-   /* Create a parent architecture directory so that we don't remove cache
-* files for other architectures. In theory we could share the cache
-* between architectures but we have no way of knowing if they were created
-* by a compatible Mesa version.
-*/
-   new_path = concatenate_and_mkdir(mem_ctx, new_path, get_arch_bitness_str());
-   if (new_path == NULL)
-  return NULL;
-
-   new_path = concatenate_and_mkdir(mem_ctx, new_path, gpu_name);
-   if (new_path == NULL)
-  return NULL;
-
-   return new_path;
-}
-
 struct disk_cache *
 disk_cache_create(const char *gpu_name, const char *timestamp)
 {
void *local;
struct disk_cache *cache = NULL;
char *path, *max_size_str;
uint64_t max_size;
int fd = -1;
struct stat sb;
struct statvfs vfs = { 0 };
@@ -210,33 +190,33 @@ disk_cache_create(const char *gpu_name, const char 
*timestamp)
 *
 *   $MESA_GLSL_CACHE_DIR
 *   $XDG_CACHE_HOME/mesa
 *   /.cache/mesa
 */
path = getenv("MESA_GLSL_CACHE_DIR");
if (path) {
   if (mkdir_if_needed(path) == -1)
  goto fail;
 
-  path = 

Re: [Mesa-dev] [RFC libdrm 0/2] Replace the build system with meson

2017-03-20 Thread Jonathan Gray
On Mon, Mar 20, 2017 at 11:30:25AM -0700, Matt Turner wrote:
> On Mon, Mar 20, 2017 at 6:55 AM, Emil Velikov  
> wrote:
> > Seems like we ended up all over the place, so let me try afresh.
> >
> > Above all:
> >  - Saying "I don't care" about your users is arrogant - let us _not_
> > do that, please ?
> 
> Let's be honest, the OpenBSD is subjecting itself to some pretty
> arbitrary restrictions caused including Mesa in its core: 10+ year old
> GCC, non-GNU Make, and now no Meson. I don't believe either FreeBSD or
> NetBSD keep Mesa as part of the core operating system, and as such
> don't suffer from these problems.
> 
> For better or worse, they have made their choices and they get to live
> with them. We are not beholden to them.

This isn't a situation like OpenSSH where people explicitly go out of
their way to provide support for and test multiple systems and add
support for horrible things like PAM.  It is more along the lines of
considering integrating patches sent by others to make code build.

> 
> > Even Linux distribution maintainers have responded that "upstream does
> > not care us", which is indicative that we should be more careful what
> > we say.
> 
> Citation needed.
> 
> > For the rest - we're dealing with two orthogonal issues here:
> >
> > * Multiple build systems
> > I believe we'll all agree that I might be the person who's been in all
> > the build systems the most.
> > Yes I _would_ _love_ to drop it all but we simply _cannot_ do that yet:
> 
> No one is advocating dropping all of the existing build systems yet.
> 
> This patch is an RFC for a smaller project to start the discussion about Mesa.
> 
> >  - [currently] there is no viable solution for Android
> 
> Acknowledged. Dylan is going to see if this is something that can be
> solved in upstream Meson.
> 
> >  - dropping the Autotools will lead to OpenBSD and NetBSD having to
> > write one from scratch, IIRC Solaris/FreeBSD and others are in similar
> > boat.
> 
> Solaris is a closed source operating system whose developers do not
> contribute to the project. We do not need to base our decisions on
> them.

So Mesa will remove support for libglvnd then?  I don't see a lot of
open source non-Mesa alternatives for libGL.

Oh and the mingw, windows and macos support can go as well, great!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC libdrm 0/2] Replace the build system with meson

2017-03-20 Thread Jonathan Gray
On Tue, Mar 21, 2017 at 08:28:22AM +1100, Timothy Arceri wrote:
> 
> 
> On 21/03/17 06:39, Emil Velikov wrote:
> > On 20 March 2017 at 18:30, Matt Turner  wrote:
> > > On Mon, Mar 20, 2017 at 6:55 AM, Emil Velikov  
> > > wrote:
> > > > Seems like we ended up all over the place, so let me try afresh.
> > > > 
> > > > Above all:
> > > >  - Saying "I don't care" about your users is arrogant - let us _not_
> > > > do that, please ?
> > > 
> > > Let's be honest, the OpenBSD is subjecting itself to some pretty
> > > arbitrary restrictions caused including Mesa in its core: 10+ year old
> > > GCC,
> > IIRC Brian was using old MinGW GCC, which was one of the blockers - it
> > wasn't OpenBSD to blame here ;-)
> 
> Sorry Emil I probably wasn't clear in our discussion. I sent out patches to
> switch to GCC 4.8 last Sept (I believe this was needed by RHEL6) [1].
> 
> Brain jumped in and said "I'm still using the MinGW gcc 4.6 compiler. I'd
> rather not go through the upgrade hassle if I don't have to."
> 
> Followed by Jose "We're internally building and shipping Mesa compiled with
> GCC 4.4 (more specifically 4.4.3).
> 
> It's fine if you require GCC 4.8 on automake, but please leave support
> for GCC 4.4.x in SCons."
> 
> By this point I got bored and moved on. But OpenBSDs GCC is a fork with
> various features backported, from what I understand Mesa would not build on
> a real GCC 4.2 release and we should not be using it as a min version. IMO
> if OpenBSD want to maintain a GCC fork they can handle a patch to downgrade
> the min GCC version.
> 
> I believe Jonathan would like us to stick with 4.2 as min but is prepared to
> deal with it if we move on.

I would like to see Mesa test features it uses in configure rather than
arbitary versions that are what a certain linux distribution ships with.
The zlib change for instance didn't reference any specific problems with
older versions or interfaces required from newer versions.

We have one platform using clang/lld now (arm64) and are likely to move
others in future where possible.  libtool has to be patched and the Mesa
configure script regenerated to make this work or Mesa won't build due
to libtool.m4 looking for specific strings in ld -v produced by bfd
binutils or gold...

And yes if you change the configure script to check for a newer version
I'll revert it locally like I did with the zlib one.

As I get the impression no one cares about patches for older GCC I've
not being sending them to the list, ie

commit d3d340d6026e516cc405a2eb1d925a7a7a467480
Author: Jonathan Gray 
Date:   Thu Mar 16 00:30:07 2017 +1100

i965: don't use designated array initialisation

Don't use a form of designated array initialisation that breaks gcc 4.2.1.

compiler/brw_vec4_gs_visitor.cpp:589: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:590: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:591: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:592: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:593: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:594: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:595: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:596: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:597: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:598: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:599: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:600: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:601: error: expected primary-expression 
before '[' token
compiler/brw_vec4_gs_visitor.cpp:602: error: expected primary-expression 
before '[' token

Signed-off-by: Jonathan Gray 

diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp 
b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index 4a8b5be30e..e7a502306e 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -585,23 +585,6 @@ vec4_gs_visitor::gs_end_primitive()
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
 }
 
-static const GLuint gl_prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
-   [GL_POINTS] =_3DPRIM_POINTLIST,
-   [GL_LINES] = _3DPRIM_LINELIST,
-   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
-   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
-   [GL_TRIANGLES] = _3DPRIM_TRILIST,
-   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
-   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
-   [GL_QUADS] = _3DPRIM_QUADLIST,
-   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
-   

Re: [Mesa-dev] [PATCH] i965: Shut up major()/minor() warnings.

2017-03-20 Thread Jonathan Gray
On Mon, Mar 20, 2017 at 04:05:04PM -0700, Kenneth Graunke wrote:
> Recent glibc generates this warning:
> 
> brw_performance_query.c:1648:13: warning: In the GNU C Library, "minor" is 
> defined
>  by . For historical compatibility, it is
>  currently defined by  as well, but we plan to
>  remove this soon. To use "minor", include 
>  directly. If you did not intend to use a system-defined macro
>  "minor", you should undefine it after including .
> 
> min = minor(sb.st_rdev);
> 
> So, include sys/sysmacros.h to shut up the warning.

This header is only present on glibc so it should be gated by an ifdef
or it will break all other systems.

minor()/major() are documented as needing sys/types.h and there is no
sys/sysmacro.h on OpenBSD for instance.

> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 2e04e091d29..17e0d6f0cdf 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -42,6 +42,7 @@
>  #include 
>  #include 
>  
> +#include 
>  #include 
>  #include 
>  #include 
> -- 
> 2.12.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] [v2] i965: Handle X-tiled modifier

2017-03-20 Thread Ben Widawsky
This doesn't really "do" anything because the default tiling for the
winsys buffer is X tiled. We do however want the X tiled modifier to
work correctly from the API perspective, which would imply that if you
set this modifier, and later do a get_modifier, you get back at least X
tiled.

Cc: Kristian Høgsberg 
Signed-off-by: Ben Widawsky 
Acked-by: Daniel Stone 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index e4f858ed33..9d458c707a 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -519,12 +519,14 @@ intel_destroy_image(__DRIimage *image)
 enum modifier_priority {
MODIFIER_PRIORITY_INVALID = 0,
MODIFIER_PRIORITY_LINEAR,
+   MODIFIER_PRIORITY_X,
MODIFIER_PRIORITY_Y,
 };
 
 const uint64_t priority_to_modifier[] = {
[MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
[MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
+   [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED,
[MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
 };
 
@@ -540,6 +542,9 @@ select_best_modifier(struct gen_device_info *devinfo,
   case I915_FORMAT_MOD_Y_TILED:
  prio = MAX2(prio, MODIFIER_PRIORITY_Y);
  break;
+  case I915_FORMAT_MOD_X_TILED:
+ prio = MAX2(prio, MODIFIER_PRIORITY_X);
+ break;
   case DRM_FORMAT_MOD_LINEAR:
  prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
  break;
@@ -577,6 +582,9 @@ intel_create_image_common(__DRIscreen *dri_screen,
 
uint64_t modifier = select_best_modifier(>devinfo, modifiers, 
count);
switch (modifier) {
+   case I915_FORMAT_MOD_X_TILED:
+  assert(tiling == I915_TILING_X);
+  break;
case DRM_FORMAT_MOD_LINEAR:
   tiling = I915_TILING_NONE;
   break;
@@ -641,6 +649,7 @@ intel_create_image_with_modifiers(__DRIscreen *dri_screen,
for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
   case I915_FORMAT_MOD_Y_TILED:
+  case I915_FORMAT_MOD_X_TILED:
   case DRM_FORMAT_MOD_LINEAR:
  local_mods[local_count++] = modifiers[i];
  break;
-- 
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] [v2] i965: Handle Y-tiled modifier

2017-03-20 Thread Ben Widawsky
This patch begins introducing how we'll actually handle the potentially
many modifiers coming in from the API, how we'll store them, and the
structure in the code to support it.

Prior to this patch, the Y-tiled modifier would be entirely ignored. It
shouldn't actually be used until this point because we've not bumped the
DRIimage extension version (which is a requirement to use modifiers).

Measuring later in the series with kmscbe:
Linear:
Read bandwidth: 1048.44 MiB/s
Write bandwidth: 1483.17 MiB/s

Y-tiled:
Read bandwidth: 471.13 MiB/s
Write bandwidth: 589.10 MiB/s

Similar functionality was introduced and then reverted here:

commit 6a0d036483caf87d43ebe2edd1905873446c9589
Author: Ben Widawsky 
Date:   Thu Apr 21 20:14:58 2016 -0700

i965: Always use Y-tiled buffers on SKL+

v2: Use last set bit instead of first set bit in modifiers to address
bug found by Daniel Stone.

v3: Use the new priority modifier selection thing. This nullifies the
bug fixed by v2 also.

Signed-off-by: Ben Widawsky 
Reviewed-by: Eric Engestrom 
Acked-by: Daniel Stone 
---
 src/gbm/backends/dri/gbm_dri.c   | 18 
 src/mesa/drivers/dri/i965/intel_screen.c | 36 
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index a7ac149365..a78ea89fca 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -1143,6 +1143,15 @@ gbm_dri_bo_create(struct gbm_device *gbm,
  goto failed;
   }
 
+  if (modifiers) {
+ for (int i = 0; i < count; i++)
+if (modifiers[i] == DRM_FORMAT_MOD_INVALID) {
+   fprintf(stderr, "Invalid modifier passed in 
DRM_FORMAT_MOD_INVALID");
+   errno = EINVAL;
+   goto failed;
+}
+  }
+
   bo->image =
  dri->image->createImageWithModifiers(dri->screen,
   width, height,
@@ -1240,6 +1249,15 @@ gbm_dri_surface_create(struct gbm_device *gbm,
   return NULL;
}
 
+   if (modifiers) {
+  for (int i = 0; i < count; i++)
+ if (modifiers[i] == DRM_FORMAT_MOD_INVALID) {
+fprintf(stderr, "Invalid modifier passed in 
DRM_FORMAT_MOD_INVALID");
+errno = EINVAL;
+return NULL;
+ }
+   }
+
surf = calloc(1, sizeof *surf);
if (surf == NULL) {
   errno = ENOMEM;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 14e60ef1a1..e4f858ed33 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -23,6 +23,7 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -518,11 +519,13 @@ intel_destroy_image(__DRIimage *image)
 enum modifier_priority {
MODIFIER_PRIORITY_INVALID = 0,
MODIFIER_PRIORITY_LINEAR,
+   MODIFIER_PRIORITY_Y,
 };
 
 const uint64_t priority_to_modifier[] = {
[MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
[MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
+   [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
 };
 
 static uint64_t
@@ -530,11 +533,13 @@ select_best_modifier(struct gen_device_info *devinfo,
  const uint64_t *modifiers,
  const unsigned count)
 {
-
enum modifier_priority prio = MODIFIER_PRIORITY_INVALID;
 
for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
+  case I915_FORMAT_MOD_Y_TILED:
+ prio = MAX2(prio, MODIFIER_PRIORITY_Y);
+ break;
   case DRM_FORMAT_MOD_LINEAR:
  prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
  break;
@@ -575,6 +580,9 @@ intel_create_image_common(__DRIscreen *dri_screen,
case DRM_FORMAT_MOD_LINEAR:
   tiling = I915_TILING_NONE;
   break;
+   case I915_FORMAT_MOD_Y_TILED:
+  tiling = I915_TILING_Y;
+  break;
case DRM_FORMAT_MOD_INVALID:
default:
  break;
@@ -626,8 +634,26 @@ intel_create_image_with_modifiers(__DRIscreen *dri_screen,
   const unsigned count,
   void *loaderPrivate)
 {
-   return intel_create_image_common(dri_screen, width, height, format, 0, NULL,
-0, loaderPrivate);
+   uint64_t local_mods[count];
+   int local_count = 0;
+
+   /* This compacts the actual modifiers to the ones we know how to handle */
+   for (int i = 0; i < count; i++) {
+  switch (modifiers[i]) {
+  case I915_FORMAT_MOD_Y_TILED:
+  case DRM_FORMAT_MOD_LINEAR:
+ local_mods[local_count++] = modifiers[i];
+ break;
+  case DRM_FORMAT_MOD_INVALID:
+ unreachable("Invalid modifiers specified\n");
+  default:
+ /* Modifiers from other vendors would land here. */
+ break;

[Mesa-dev] [PATCH 3/6] [v2] i965: Handle the linear fb modifier

2017-03-20 Thread Ben Widawsky
At image creation create a path for dealing with the linear modifier.
This works exactly like the old usage flags where __DRI_IMAGE_USE_LINEAR
was specified.

During development of this patch series, it was decided that a lack of
modifier was an insufficient way to express the required modifiers. As a
result, 0 was repurposed to mean a modifier for a LINEAR layout.

NOTE: This patch was added for v3 of the patch series.

v2: Rework the algorithm for modifier selection to go from a bitmask
based selection to this priority value.

Requested-by: Jason Ekstrand 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 50 +++-
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 2455649032..14e60ef1a1 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -45,6 +45,10 @@
 #define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1)
 #endif
 
+#ifndef DRM_FORMAT_MOD_LINEAR
+#define DRM_FORMAT_MOD_LINEAR 0
+#endif
+
 static const __DRIconfigOptionsExtension brw_config_options = {
.base = { __DRI_CONFIG_OPTIONS, 1 },
.xml =
@@ -511,13 +515,36 @@ intel_destroy_image(__DRIimage *image)
free(image);
 }
 
+enum modifier_priority {
+   MODIFIER_PRIORITY_INVALID = 0,
+   MODIFIER_PRIORITY_LINEAR,
+};
+
+const uint64_t priority_to_modifier[] = {
+   [MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
+   [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
+};
+
 static uint64_t
 select_best_modifier(struct gen_device_info *devinfo,
  const uint64_t *modifiers,
  const unsigned count)
 {
-   /* Modifiers are not supported by this DRI driver */
-   return DRM_FORMAT_MOD_INVALID;
+
+   enum modifier_priority prio = MODIFIER_PRIORITY_INVALID;
+
+   for (int i = 0; i < count; i++) {
+  switch (modifiers[i]) {
+  case DRM_FORMAT_MOD_LINEAR:
+ prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
+ break;
+  case DRM_FORMAT_MOD_INVALID:
+  default:
+ unreachable("Invalid modifiers specified\n");
+  }
+   }
+
+   return priority_to_modifier[prio];
 }
 
 static __DRIimage *
@@ -530,7 +557,10 @@ intel_create_image_common(__DRIscreen *dri_screen,
 {
__DRIimage *image;
struct intel_screen *screen = dri_screen->driverPrivate;
-   uint32_t tiling;
+   /* Historically, X-tiled was the default, and so lack of modifier means
+* X-tiled.
+*/
+   uint32_t tiling = I915_TILING_X;
int cpp;
unsigned long pitch;
 
@@ -541,12 +571,15 @@ intel_create_image_common(__DRIscreen *dri_screen,
assert(!(use && count));
 
uint64_t modifier = select_best_modifier(>devinfo, modifiers, 
count);
-   assert(modifier == DRM_FORMAT_MOD_INVALID);
+   switch (modifier) {
+   case DRM_FORMAT_MOD_LINEAR:
+  tiling = I915_TILING_NONE;
+  break;
+   case DRM_FORMAT_MOD_INVALID:
+   default:
+ break;
+   }
 
-   /* Historically, X-tiled was the default, and so lack of modifier means
-* X-tiled.
-*/
-   tiling = I915_TILING_X;
if (use & __DRI_IMAGE_USE_CURSOR) {
   if (width != 64 || height != 64)
 return NULL;
@@ -571,6 +604,7 @@ intel_create_image_common(__DRIscreen *dri_screen,
image->width = width;
image->height = height;
image->pitch = pitch;
+   image->modifier = modifier;
 
return image;
 }
-- 
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] [v2] i965: Enable modifier queries

2017-03-20 Thread Ben Widawsky
New to the patch series after reordering things for landing smaller
chunks.

This will essentially enable modifiers from clients that were just
enabled in previous patches. A client could use the modifiers by
setting all of them at create, but had no way to actually query them
after creating the surface (ie. stupid clients could be broken before
this patch, but in more ways than this).

Obviously, there are no modifiers being actually stored yet - so this
patch shouldn't do anything other than allow the API to get back 0 (or
the LINEAR modifier).

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_image.h  | 1 +
 src/mesa/drivers/dri/i965/intel_screen.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
b/src/mesa/drivers/dri/i965/intel_image.h
index fd63919b2d..a362c76689 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -71,6 +71,7 @@ struct __DRIimageRec {
GLenum internal_format;
uint32_t dri_format;
GLuint format;
+   uint64_t modifier; /**< fb modifier (fourcc) */
uint32_t offset;
 
/*
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 1525b4c560..2455649032 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -632,6 +632,12 @@ intel_query_image(__DRIimage *image, int attrib, int 
*value)
case __DRI_IMAGE_ATTRIB_OFFSET:
   *value = image->offset;
   return true;
+   case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
+  *value = (image->modifier & 0x);
+  return true;
+   case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
+  *value = ((image->modifier >> 32) & 0x);
+  return true;
 
   default:
   return false;
-- 
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] [v2] i965: Enable modifier queries

2017-03-20 Thread Ben Widawsky
New to the patch series after reordering things for landing smaller
chunks.

This will essentially enable modifiers from clients that were just
enabled in previous patches. A client could use the modifiers by
setting all of them at create, but had no way to actually query them
after creating the surface (ie. stupid clients could be broken before
this patch, but in more ways than this).

Obviously, there are no modifiers being actually stored yet - so this
patch shouldn't do anything other than allow the API to get back 0 (or
the LINEAR modifier).

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_image.h  | 1 +
 src/mesa/drivers/dri/i965/intel_screen.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
b/src/mesa/drivers/dri/i965/intel_image.h
index fd63919b2d..a362c76689 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -71,6 +71,7 @@ struct __DRIimageRec {
GLenum internal_format;
uint32_t dri_format;
GLuint format;
+   uint64_t modifier; /**< fb modifier (fourcc) */
uint32_t offset;
 
/*
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 1525b4c560..2455649032 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -632,6 +632,12 @@ intel_query_image(__DRIimage *image, int attrib, int 
*value)
case __DRI_IMAGE_ATTRIB_OFFSET:
   *value = image->offset;
   return true;
+   case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
+  *value = (image->modifier & 0x);
+  return true;
+   case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
+  *value = ((image->modifier >> 32) & 0x);
+  return true;
 
   default:
   return false;
-- 
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] configure.ac: Use POSIX word boundary regex.

2017-03-20 Thread Michel Dänzer
On 20/03/17 12:19 AM, Eric Engestrom wrote:
> On Sunday, 2017-03-19 13:44:30 +, Jan Beich wrote:
>> Vinson Lee  writes:
>>
>>> --e 's/[[[:space:]]]+-DNDEBUG\>//g' \
>>> --e 's/[[[:space:]]]+-D_GNU_SOURCE\>//g' \
>>> --e 's/[[[:space:]]]+-pedantic\>//g' \
>>> +-e 's/[[[:space:]]]+-DNDEBUG[[[:space:]]]//g' \
>>> +-e 's/[[[:space:]]]+-D_GNU_SOURCE[[[:space:]]]//g' \
>>> +-e 's/[[[:space:]]]+-pedantic[[[:space:]]]//g' \
>>
>> If you always expect whitespace on both sides but trim it the RE
>> may not match with more than one occurence in sequence e.g.,
>>
>>  $ echo ' -D_GNU_SOURCE -DNDEBUG ' |
>>sed -E -e 's/[[:space:]]+-DNDEBUG[[:space:]]//g' \
>>   -e 's/[[:space:]]+-D_GNU_SOURCE[[:space:]]//g'
>>-D_GNU_SOURCE
>>
>> vs. (restore some whitespace)
>>
>>   $ echo ' -D_GNU_SOURCE -DNDEBUG ' |
>> sed -E -e 's/[[:space:]]+-DNDEBUG[[:space:]]/ /g' \
>>-e 's/[[:space:]]+-D_GNU_SOURCE[[:space:]]/ /g'
>>
> 
> Oh yes, very good point there!
> My r-b now stands with this fix applied :)
> 
> This will lead to a bunch of unnecessary whitespace in the middle, but
> I'm not sure I'd recommend addressing this:
> A simple 's/[[[:space:]]]+/ /g' at the end should trivially fix this,
> [...]

Another option would be to match the trailing whitespace as a
subexpression and add that back on the right hand side.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] configure.ac: Use POSIX word boundary regex.

2017-03-20 Thread Michel Dänzer

In addition to other comments, the Git shortlog no longer matches the
patch itself.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Select pipeline and emit state base address in Gen8+ HiZ ops.

2017-03-20 Thread Nanley Chery
On Mon, Mar 20, 2017 at 08:01:25PM -0700, Nanley Chery wrote:
> On Thu, Mar 16, 2017 at 05:34:13PM -0700, Kenneth Graunke wrote:
> > On Wednesday, March 8, 2017 10:27:20 AM PDT Nanley Chery wrote:
> > > On Wed, Mar 08, 2017 at 10:07:12AM -0800, Nanley Chery wrote:
> > > > On Wed, Mar 08, 2017 at 02:17:59AM -0800, Kenneth Graunke wrote:
> > > > > On Thursday, March 2, 2017 4:36:08 PM PST Nanley Chery wrote:
> > > > > > On Mon, Feb 06, 2017 at 03:55:49PM -0800, Kenneth Graunke wrote:
> > > > > > > If a HiZ op is the first thing in the batch, we should make sure
> > > > > > > to select the render pipeline and emit state base address before
> > > > > > > proceeding.
> > > > > > > 
> > > > > > > I believe 3DSTATE_WM_HZ_OP creates 3DPRIMITIVEs internally, and
> > > > > > > dispatching those on the GPGPU pipeline seems a bit sketchy.  I'm
> > > > > > 
> > > > > > Yes, it does seem like we currently allow HZ_OPs within a GPGPU
> > > > > > pipeline. This patch should fix that problem.
> > > > > > 
> > > > > > > not actually sure that STATE_BASE_ADDRESS is necessary, as the
> > > > > > > depth related commands use graphics addresses, not ones relative
> > > > > > > to the base address...but we're likely to set it as part of the
> > > > > > > next operation anyway, so we should just do it right away.
> > > > > > > 
> 
> Why should we do it right away if it will happen later on? I don't see
> why this part of the patch is necessary.
> 
> > > > > > 
> > > > > > I agree, re-emitting STATE_BASE_ADDRESS doesn't seem necessary. I 
> > > > > > think
> > > > > > we should drop this part of the patch and add it back in later if 
> > > > > > we get
> > > > > > some data that it's necessary. Leaving it there may be distracting 
> > > > > > to
> > > > > > some readers and the BDW PRM warns that it's an expensive command:
> > > > > > 
> > > > > > Execution of this command causes a full pipeline flush, thus its
> > > > > > use should be minimized for higher performance.
> > > > > 
> > > > > I think it should be basically free, actually.  We track a boolean,
> > > > > brw->batch.state_base_address_emitted, to avoid emitting it multiple
> > > > > times per batch.
> > > > > 
> > > > > Let's say the first thing in a fresh batch is a HiZ op, followed by
> > > > > normal drawing.  Previously, we'd do:
> > > > > 
> > > > > 1. HiZ op commands
> > > > > 2. STATE_BASE_ADDRESS (triggered by normal rendering upload)
> > > > > 3. rest of normal drawing commands
> > > > > 
> > > > > Now we'd do:
> > > > > 
> > > > > 1. STATE_BASE_ADDRESS (triggered by HiZ op)
> > > > > 2. HiZ op commands
> > > > > 3. normal drawing commands (second SBA is skipped)
> > > > > 
> > > > > In other words...we're just moving it a bit earlier.  I suppose there
> > > > > could be a batch containing only HiZ ops, at which point we'd pay for
> > > > > a single STATE_BASE_ADDRESS...but that seems really unlikely.
> > > > > 
> > > > 
> > > > Sorry for not stating it up front, but the special case you've mentioned
> > > > is exactly what I'd like not to hurt unnecessarily.
> > > > 
> > 
> > Why?  We really think there are going to be batches with only
> > 3DSTATE_WM_HZ_OP and no normal rendering or BLORP?  It sounds
> > really hypothetical to me.
> > 
> 
> I've commented on the performance implications of that snippet because
> it is the only functional change I can see from emitting SBA. That
> unfortunately seems to have distracted us from the more important
> question found above. Sorry about that.
> 
> > > Correct me if I'm wrong, but after thinking about it some more, it seems
> > > that performance wouldn't suffer by emitting the SBA since the pipeline
> > > was already flushed at the end of the preceding batch. It may also
> > > improve since the pipelined HiZ op will likely be followed by other
> > > pipelined commands. I'm not totally confident in my understanding on
> > > pipeline flushes by the way. Is this why you'd like to emit the SBA here?
> > > I think it's fine to leave it if we expound on the rationale.
> > 
> > Performance is not a motivation for this patch.  Having the GPU do
> > work without a pipeline selected or state base addresses in place seems
> > potentially dangerous.  I was hoping it would help with GPU hangs.  I'm
> > not certain that it does, and it might be safe to skip this, but it
> > seems like a lot of mental gymnastics to prove that it's safe for very
> > little upside.
> > 
> 
> I was only referring to the portion of the patch that emits SBA.
> 

Sorry, I definitely did not read this thoroughly enough. Please ignore
my earlier reply.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Select pipeline and emit state base address in Gen8+ HiZ ops.

2017-03-20 Thread Nanley Chery
On Thu, Mar 16, 2017 at 05:34:13PM -0700, Kenneth Graunke wrote:
> On Wednesday, March 8, 2017 10:27:20 AM PDT Nanley Chery wrote:
> > On Wed, Mar 08, 2017 at 10:07:12AM -0800, Nanley Chery wrote:
> > > On Wed, Mar 08, 2017 at 02:17:59AM -0800, Kenneth Graunke wrote:
> > > > On Thursday, March 2, 2017 4:36:08 PM PST Nanley Chery wrote:
> > > > > On Mon, Feb 06, 2017 at 03:55:49PM -0800, Kenneth Graunke wrote:
> > > > > > If a HiZ op is the first thing in the batch, we should make sure
> > > > > > to select the render pipeline and emit state base address before
> > > > > > proceeding.
> > > > > > 
> > > > > > I believe 3DSTATE_WM_HZ_OP creates 3DPRIMITIVEs internally, and
> > > > > > dispatching those on the GPGPU pipeline seems a bit sketchy.  I'm
> > > > > 
> > > > > Yes, it does seem like we currently allow HZ_OPs within a GPGPU
> > > > > pipeline. This patch should fix that problem.
> > > > > 
> > > > > > not actually sure that STATE_BASE_ADDRESS is necessary, as the
> > > > > > depth related commands use graphics addresses, not ones relative
> > > > > > to the base address...but we're likely to set it as part of the
> > > > > > next operation anyway, so we should just do it right away.
> > > > > > 

Why should we do it right away if it will happen later on? I don't see
why this part of the patch is necessary.

> > > > > 
> > > > > I agree, re-emitting STATE_BASE_ADDRESS doesn't seem necessary. I 
> > > > > think
> > > > > we should drop this part of the patch and add it back in later if we 
> > > > > get
> > > > > some data that it's necessary. Leaving it there may be distracting to
> > > > > some readers and the BDW PRM warns that it's an expensive command:
> > > > > 
> > > > >   Execution of this command causes a full pipeline flush, thus its
> > > > >   use should be minimized for higher performance.
> > > > 
> > > > I think it should be basically free, actually.  We track a boolean,
> > > > brw->batch.state_base_address_emitted, to avoid emitting it multiple
> > > > times per batch.
> > > > 
> > > > Let's say the first thing in a fresh batch is a HiZ op, followed by
> > > > normal drawing.  Previously, we'd do:
> > > > 
> > > > 1. HiZ op commands
> > > > 2. STATE_BASE_ADDRESS (triggered by normal rendering upload)
> > > > 3. rest of normal drawing commands
> > > > 
> > > > Now we'd do:
> > > > 
> > > > 1. STATE_BASE_ADDRESS (triggered by HiZ op)
> > > > 2. HiZ op commands
> > > > 3. normal drawing commands (second SBA is skipped)
> > > > 
> > > > In other words...we're just moving it a bit earlier.  I suppose there
> > > > could be a batch containing only HiZ ops, at which point we'd pay for
> > > > a single STATE_BASE_ADDRESS...but that seems really unlikely.
> > > > 
> > > 
> > > Sorry for not stating it up front, but the special case you've mentioned
> > > is exactly what I'd like not to hurt unnecessarily.
> > > 
> 
> Why?  We really think there are going to be batches with only
> 3DSTATE_WM_HZ_OP and no normal rendering or BLORP?  It sounds
> really hypothetical to me.
> 

I've commented on the performance implications of that snippet because
it is the only functional change I can see from emitting SBA. That
unfortunately seems to have distracted us from the more important
question found above. Sorry about that.

> > Correct me if I'm wrong, but after thinking about it some more, it seems
> > that performance wouldn't suffer by emitting the SBA since the pipeline
> > was already flushed at the end of the preceding batch. It may also
> > improve since the pipelined HiZ op will likely be followed by other
> > pipelined commands. I'm not totally confident in my understanding on
> > pipeline flushes by the way. Is this why you'd like to emit the SBA here?
> > I think it's fine to leave it if we expound on the rationale.
> 
> Performance is not a motivation for this patch.  Having the GPU do
> work without a pipeline selected or state base addresses in place seems
> potentially dangerous.  I was hoping it would help with GPU hangs.  I'm
> not certain that it does, and it might be safe to skip this, but it
> seems like a lot of mental gymnastics to prove that it's safe for very
> little upside.
> 

I was only referring to the portion of the patch that emits SBA.

> I think you're right, though - doing the non-pipelined commands at the
> top may actually be better than kicking off work, stalling, and kicking
> off more work.  *shrug*
> 
> > -Nanley
> > 
> > > > > > Cc: "17.0" 
> > > > > > Signed-off-by: Kenneth Graunke 
> > > > > > ---
> > > > > >  src/mesa/drivers/dri/i965/gen8_depth_state.c | 3 +++
> > > > > >  1 file changed, 3 insertions(+)
> > > > > > 
> > > > > > diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
> > > > > > b/src/mesa/drivers/dri/i965/gen8_depth_state.c
> > > > > > index a7e61354fd5..620b32df8bb 100644
> > > > > > --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
> > > > > > +++ 

[Mesa-dev] [AppVeyor] mesa master #3808 completed

2017-03-20 Thread AppVeyor


Build mesa 3808 completed



Commit 020b3f0c46 by Timothy Arceri on 3/21/2017 12:49 AM:

util/disk_cache: fix build on platforms where shader cache is disabled


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2017-03-20 Thread Peter Frühberger
Hi Christian,

we use it the following way:
Dependend on the surface NV12 vs. P010 we use:
https://github.com/FernetMenta/kodi-agile/blob/master/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp#L1416


R8 and GR88

or alternatively:
https://github.com/FernetMenta/kodi-agile/blob/master/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp#L1493

R16 and GR32

There is also a possibility to use BGRA, but this involves internal copy of
the yuv surfaces in vaapi and is therefore not suited well (more memory and
more load).

For both images Y and UV we use: eglCreateImageKHR extension follow
by glEGLImageTargetTexture2DOES.

See:
https://github.com/FernetMenta/kodi-agile/blob/master/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp#L1262

On the VAAPI side:
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME with either VA_RT_FORMAT_YUV420
or VA_FOURCC_P010 are used.

I think that method is quite generalizable and nothing is intel specific.

What do you think?

Best regards
Peter



2017-03-19 14:49 GMT+01:00 Christian König :

> Hi Peter,
>
> Adding Michel and Marek for the Mesa interop side and Harry for the
> display side.
>
> How do you want us to display the decoded surfaces?
>
> Well to make a long story short: I don't have the slightest idea. Ideally
> we would of the same handling as Intel so that you guys don't have anything
> vendor dependent in your code.
>
> The first step would be to get the VA-API DRM extension to work with EGL.
> So that Kodi is able to export the YUV surfaces and import parts of them as
> separate R8/R16 or R8G8/R16G16 surfaces, right?
>
> What EGL/GL extension do you guys use to import the surfaces? Marek is
> that stuff fully supported, e.g. do we also handle the offsets correctly?
> I've added the backend code for this while doing VDPAU interop, but the
> EGL/GL frontend code needs to handle it gracefully as well.
>
> The second step is then to teach our DC how to handle RGB surfaces with
> 10bit. I doubt the old code has support for that and we probably don't want
> to add it. So Harry can you comment on how far along we got with that in DC?
>
> Regards,
> Christian.
>
>
> Am 19.03.2017 um 13:26 schrieb Peter Frühberger:
>
> Hi Christian,
>
> thank you for your message. We are still wondering about the render part.
> How do you want us to display the decoded surfaces? Looking at mpv it seems
> it will only work via vaPutSurface and is therefore tight to X11. That
> means it's dependend on the visuals 8 bit only.
>
> We are working on a drm-only kodi and now ask ourselves: Is there a
> possibility to interop with a drm extension and eglCreateImage on AMD hw,
> too? With the intel only R32, R8 linux buf methods we are also running
> succesfully on MIR now, wayland would work the very same.
>
> Best regards
> Peter
>
>
>
> 2017-03-10 17:25 GMT+01:00 rainer.hochec...@onlinehome.de <
> rainer.hochec...@onlinehome.de>:
>
>> Hi Christian,
>>
>> I already removed the check for Intel in my dev branch. On startup
>> Kodi does a functional test if vaapi works. If the test passes, it is
>> availalbe
>> regarless of the underlying type of hardware/driver.
>>
>> Regards,
>> Rainer
>>
>> *Gesendet:* Mittwoch, 08. März 2017 um 13:29 Uhr
>> *Von:* "Christian König" 
>> *An:* mesa-dev@lists.freedesktop.org
>> *Cc:* rainer.hochec...@onlinehome.de, peter.fruehber...@gmail.com
>> *Betreff:* 10bit HEVC decoding for RadeonSI v2
>> Hi guys,
>>
>> I finally found time testing this and hammering out (hopefully) all the
>> remaining bugs. Playing a 10bit HEVC file through VAAPI with mpv/ffmpeg
>> git
>> master from about two days ago now works flawlessly and has only about
>> 15% CPU
>> load on one core on a Kaveri system.
>>
>> The VDPAU path should work as well, but NVidias implementation of this is
>> still
>> completely broken and so nobody enables it and we don't have a way to
>> test it.
>>
>> Rainer/Peter maybe you guys want to take a look and enable it in Kodi.
>>
>> The next logical step is to get our display code paths to be 10bit ready.
>>
>> Please review and comment,
>> Christian.
>>
>>
>
>
>
> --
>Key-ID: 0x1A995A9B
>keyserver: pgp.mit.edu
> ==
> Fingerprint: 4606 DA19 EC2E 9A0B 0157  C81B DA07 CF63 1A99 5A9B
>
>
>


-- 
   Key-ID: 0x1A995A9B
   keyserver: pgp.mit.edu
==
Fingerprint: 4606 DA19 EC2E 9A0B 0157  C81B DA07 CF63 1A99 5A9B
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2017-03-20 Thread Peter Frühberger
Hi Christian,

thank you for your message. We are still wondering about the render part.
How do you want us to display the decoded surfaces? Looking at mpv it seems
it will only work via vaPutSurface and is therefore tight to X11. That
means it's dependend on the visuals 8 bit only.

We are working on a drm-only kodi and now ask ourselves: Is there a
possibility to interop with a drm extension and eglCreateImage on AMD hw,
too? With the intel only R32, R8 linux buf methods we are also running
succesfully on MIR now, wayland would work the very same.

Best regards
Peter



2017-03-10 17:25 GMT+01:00 rainer.hochec...@onlinehome.de <
rainer.hochec...@onlinehome.de>:

> Hi Christian,
>
> I already removed the check for Intel in my dev branch. On startup
> Kodi does a functional test if vaapi works. If the test passes, it is
> availalbe
> regarless of the underlying type of hardware/driver.
>
> Regards,
> Rainer
>
> *Gesendet:* Mittwoch, 08. März 2017 um 13:29 Uhr
> *Von:* "Christian König" 
> *An:* mesa-dev@lists.freedesktop.org
> *Cc:* rainer.hochec...@onlinehome.de, peter.fruehber...@gmail.com
> *Betreff:* 10bit HEVC decoding for RadeonSI v2
> Hi guys,
>
> I finally found time testing this and hammering out (hopefully) all the
> remaining bugs. Playing a 10bit HEVC file through VAAPI with mpv/ffmpeg git
> master from about two days ago now works flawlessly and has only about 15%
> CPU
> load on one core on a Kaveri system.
>
> The VDPAU path should work as well, but NVidias implementation of this is
> still
> completely broken and so nobody enables it and we don't have a way to test
> it.
>
> Rainer/Peter maybe you guys want to take a look and enable it in Kodi.
>
> The next logical step is to get our display code paths to be 10bit ready.
>
> Please review and comment,
> Christian.
>
>



-- 
   Key-ID: 0x1A995A9B
   keyserver: pgp.mit.edu
==
Fingerprint: 4606 DA19 EC2E 9A0B 0157  C81B DA07 CF63 1A99 5A9B
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa glthread: allow asynchronous pixel transfer operation when a buffer is bound

2017-03-20 Thread Markus Wick

Am 2017-03-20 14:21, schrieb Nicolai Hähnle:

On 17.03.2017 18:59, gregory hainaut wrote:
If the application is badly/strangely coded, glthread will make it 
worst.

The solution ought to be either fix the app or don't use glthread.


It would be nice if glthread could handle this properly, but I don't
currently see how.


The dispatcher thread needs a map of all valid buffer objects. So we 
need to update such a map on all glGenBuffers/glDeleteBuffers calls. So 
the overhead will be the map lookup on all affected glBindBuffer calls.


Regards,
degasus
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glxglvnddispatch: Add missing dispatch for GetDriverConfig

2017-03-20 Thread Hans de Goede
Together with some fixes to xdriinfo this fixes xdriinfo not working
with glvnd.

Since apps (xdriinfo) expect GetDriverConfig to work without going to
need through the dance to setup a glxcontext (which is a reasonable
expectation IMHO), the dispatch for this ends up significantly different
then any other dispatch function.

This patch gets the job done, but I'm not really happy with how this
patch turned out, suggestions for a better fix are welcome.

Cc: Kyle Brenneman 
Signed-off-by: Hans de Goede 
---
 src/glx/g_glxglvnddispatchfuncs.c   | 18 ++
 src/glx/g_glxglvnddispatchindices.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/src/glx/g_glxglvnddispatchfuncs.c 
b/src/glx/g_glxglvnddispatchfuncs.c
index b5e3398..040cdf8 100644
--- a/src/glx/g_glxglvnddispatchfuncs.c
+++ b/src/glx/g_glxglvnddispatchfuncs.c
@@ -4,6 +4,7 @@
  */
 #include 
 
+#include "glxclient.h"
 #include "glxglvnd.h"
 #include "glxglvnddispatchfuncs.h"
 #include "g_glxglvnddispatchindices.h"
@@ -50,6 +51,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = 
{
 __ATTRIB(GetCurrentDisplayEXT),
 // glXGetCurrentDrawable implemented by libglvnd
 // glXGetCurrentReadDrawable implemented by libglvnd
+__ATTRIB(GetDriverConfig),
 // glXGetFBConfigAttrib implemented by libglvnd
 __ATTRIB(GetFBConfigAttribSGIX),
 __ATTRIB(GetFBConfigFromVisualSGIX),
@@ -334,6 +336,21 @@ static Display *dispatch_GetCurrentDisplayEXT(void)
 
 
 
+static const char *dispatch_GetDriverConfig(const char *driverName)
+{
+/*
+ * The options are constant for a given driverName, so we do not need
+ * a context (and apps expect to be able to call this without one).
+ */
+#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
+return glXGetDriverConfig(driverName);
+#else
+return NULL;
+#endif
+}
+
+
+
 static int dispatch_GetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config,
   int attribute, int *value_return)
 {
@@ -939,6 +956,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 
1] = {
 __ATTRIB(DestroyGLXPbufferSGIX),
 __ATTRIB(GetContextIDEXT),
 __ATTRIB(GetCurrentDisplayEXT),
+__ATTRIB(GetDriverConfig),
 __ATTRIB(GetFBConfigAttribSGIX),
 __ATTRIB(GetFBConfigFromVisualSGIX),
 __ATTRIB(GetMscRateOML),
diff --git a/src/glx/g_glxglvnddispatchindices.h 
b/src/glx/g_glxglvnddispatchindices.h
index 05a2c8c..3ba50a7 100644
--- a/src/glx/g_glxglvnddispatchindices.h
+++ b/src/glx/g_glxglvnddispatchindices.h
@@ -39,6 +39,7 @@ typedef enum __GLXdispatchIndex {
 DI_GetCurrentDisplayEXT,
 // GetCurrentDrawable implemented by libglvnd
 // GetCurrentReadDrawable implemented by libglvnd
+DI_GetDriverConfig,
 // GetFBConfigAttrib implemented by libglvnd
 DI_GetFBConfigAttribSGIX,
 DI_GetFBConfigFromVisualSGIX,
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2017-03-20 Thread rainer.hochec...@onlinehome.de
> for example how does synchronization happen between the two APIs?

 

right, vaapi seems not as matured as vdpau in this regard. But Kodi's multithreading design does cope with this. We call

vaSyncSurface before feeding vpp and before maping va buffers to GL.


 

I suggest to keep it simple from a driver perspective and require applications to use vaSyncSurface

 


Gesendet: Sonntag, 19. März 2017 um 15:28 Uhr
Von: "Christian König" 
An: "Peter Frühberger" 
Cc: "rainer.hochec...@onlinehome.de" , mesa-dev@lists.freedesktop.org, lru...@libreelec.tv, "Michel Dänzer" , "Marek Olšák" , "Wentland, Harry" 
Betreff: Re: 10bit HEVC decoding for RadeonSI v2





What do you think?

In general that it might work, but basic problem is the API design once more.

While with VDPAU the steps where applications asks OpenGL to interop with VDPAU and the two APIs can do all the handshaking internally.

With VA-API we have Application exporting buffers from VA-API and then importing the same buffer as two surfaces into OpenGL.

That leaves a whole bunch of open questions, for example how does synchronization happen between the two APIs? E.g. the application (Kodi) probably doesn't wants to wait for the decoding result before it uses the the surface with OpenGL. We don't have a way to sync between the two APIs here except for the handle.

The next problem is how do we communicate the layout of data in the buffer? E.g. we have the format and the offset, but that assumes that you don't have any nasty kind of tilling modes applied here.

I think we can make that work for now (we aren't using tilling modes with UVD much anyway), but this is going to bite us again sooner or later. Going to put the whole thing on my todo list once more.

Regards,
Christian.

Am 19.03.2017 um 15:06 schrieb Peter Frühberger:


Hi Christian,
 

we use it the following way:

Dependend on the surface NV12 vs. P010 we use:

https://github.com/FernetMenta/kodi-agile/blob/master/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp#L1416 

 

R8 and GR88

 

or alternatively:

https://github.com/FernetMenta/kodi-agile/blob/master/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp#L1493

 

R16 and GR32

 

There is also a possibility to use BGRA, but this involves internal copy of the yuv surfaces in vaapi and is therefore not suited well (more memory and more load).

 

For both images Y and UV we use: eglCreateImageKHR extension follow by glEGLImageTargetTexture2DOES.

 

See: https://github.com/FernetMenta/kodi-agile/blob/master/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp#L1262

 

On the VAAPI side:
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME with either VA_RT_FORMAT_YUV420 or VA_FOURCC_P010 are used.

 

I think that method is quite generalizable and nothing is intel specific.

 

What do you think?

 

Best regards

Peter

 

 


 
2017-03-19 14:49 GMT+01:00 Christian König :



Hi Peter,

Adding Michel and Marek for the Mesa interop side and Harry for the display side.
 
How do you want us to display the decoded surfaces?
  Well to make a long story short: I don't have the slightest idea. Ideally we would of the same handling as Intel so that you guys don't have anything vendor dependent in your code.

The first step would be to get the VA-API DRM extension to work with EGL. So that Kodi is able to export the YUV surfaces and import parts of them as separate R8/R16 or R8G8/R16G16 surfaces, right?

What EGL/GL extension do you guys use to import the surfaces? Marek is that stuff fully supported, e.g. do we also handle the offsets correctly? I've added the backend code for this while doing VDPAU interop, but the EGL/GL frontend code needs to handle it gracefully as well.

The second step is then to teach our DC how to handle RGB surfaces with 10bit. I doubt the old code has support for that and we probably don't want to add it. So Harry can you comment on how far along we got with that in DC?

Regards,
Christian.



Am 19.03.2017 um 13:26 schrieb Peter Frühberger:






Hi Christian,
 

thank you for your message. We are still wondering about the render part. How do you want us to display the decoded surfaces? Looking at mpv it seems it will only work via vaPutSurface and is therefore tight to X11. That means it's dependend on the visuals 8 bit only.

 

We are working on a drm-only kodi and now ask ourselves: Is there a possibility to interop with a drm extension and eglCreateImage on AMD hw, too? With the intel only R32, R8 linux buf methods we are also running succesfully on MIR now, wayland would work the very same.

 

Best regards

Peter

 

 


 
2017-03-10 17:25 GMT+01:00 rainer.hochec...@onlinehome.de :





Hi Christian,

 

I already removed the check for Intel in my dev branch. On startup

Kodi does a functional test if vaapi works. If the 

[Mesa-dev] [AppVeyor] mesa master #3807 failed

2017-03-20 Thread AppVeyor



Build mesa 3807 failed


Commit af73acca2b by Grazvydas Ignotas on 3/15/2017 11:09 PM:

tests/cache_test: use the blob key's actual first byte\n\nThere is no need to hardcode it, we can just use blob_key[0].\nThis is needed because the next patches are going to change how cache\nkeys are computed.\n\nSigned-off-by: Grazvydas Ignotas \nReviewed-by: Timothy Arceri 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/5] i965: Turn on support for image modifiers

2017-03-20 Thread Jason Ekstrand
On Mon, Mar 20, 2017 at 4:30 PM, Ben Widawsky  wrote:

> On 17-03-20 16:21:53, Jason Ekstrand wrote:
>
>> On Mon, Mar 20, 2017 at 4:15 PM, Ben Widawsky  wrote:
>>
>> On 17-03-20 12:03:24, Jason Ekstrand wrote:
>>>
>>> My only question here is whether or not we want to add a "supported
 modifiers" query to DRIimage before we bump the version number.  It's
 probably not super-important because we can always bump the version
 number
 again.  Meh.


 Remind me what the reasoning is for support modifiers? I thought we
>>> agreed
>>> to
>>> just use GET_PLANE2? Supported modifiers is difficult because mesa has no
>>> idea
>>> of per plane constraints.
>>>
>>
>>
>> They're for completely different cases.  If you want to scan out, use
>> GET_PLANE2, end of story.  If, however, you're not trying to scan out and
>> your window is always going to get composited (think not full-screen) then
>> you may have a much larger set of modifiers available to you and that set
>> of modifiers is not determined by GET_PLANE2 but by the GL or whatever is
>> being used for compositing.
>>
>>
>>
> Oh, yeah. Is there a consumer today of this? If we don't really have one
> readily
> available, I'd say don't worry about it until we need it.


It'll be consumed by the EGL extension that's being cooked up.  If you want
to put it off, that's fine with me.


>
>
>>> On Sat, Mar 18, 2017 at 1:00 PM, Ben Widawsky  wrote:
>>>

 All the plumbing is in place so the extension just needs to be

> advertised.
>
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index ca725d9202..1ff18b30e4 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -948,7 +948,7 @@ intel_from_planar(__DRIimage *parent, int plane,
> void
> *loaderPrivate)
>  }
>
>  static const __DRIimageExtension intelImageExtension = {
> -.base = { __DRI_IMAGE, 13 },
> +.base = { __DRI_IMAGE, 14 },
>
>  .createImageFromName=
> intel_create_image_from_name,
>  .createImageFromRenderbuffer= intel_create_image_from_
> renderbuffer,
> --
> 2.12.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
> --
>>> Ben Widawsky, Intel Open Source Technology Center
>>>
>>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/5] i965: Turn on support for image modifiers

2017-03-20 Thread Ben Widawsky

On 17-03-20 16:21:53, Jason Ekstrand wrote:

On Mon, Mar 20, 2017 at 4:15 PM, Ben Widawsky  wrote:


On 17-03-20 12:03:24, Jason Ekstrand wrote:


My only question here is whether or not we want to add a "supported
modifiers" query to DRIimage before we bump the version number.  It's
probably not super-important because we can always bump the version number
again.  Meh.



Remind me what the reasoning is for support modifiers? I thought we agreed
to
just use GET_PLANE2? Supported modifiers is difficult because mesa has no
idea
of per plane constraints.



They're for completely different cases.  If you want to scan out, use
GET_PLANE2, end of story.  If, however, you're not trying to scan out and
your window is always going to get composited (think not full-screen) then
you may have a much larger set of modifiers available to you and that set
of modifiers is not determined by GET_PLANE2 but by the GL or whatever is
being used for compositing.




Oh, yeah. Is there a consumer today of this? If we don't really have one readily
available, I'd say don't worry about it until we need it.



On Sat, Mar 18, 2017 at 1:00 PM, Ben Widawsky  wrote:


All the plumbing is in place so the extension just needs to be

advertised.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
b/src/mesa/drivers/dri/i965/intel_screen.c
index ca725d9202..1ff18b30e4 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -948,7 +948,7 @@ intel_from_planar(__DRIimage *parent, int plane, void
*loaderPrivate)
 }

 static const __DRIimageExtension intelImageExtension = {
-.base = { __DRI_IMAGE, 13 },
+.base = { __DRI_IMAGE, 14 },

 .createImageFromName= intel_create_image_from_name,
 .createImageFromRenderbuffer= intel_create_image_from_
renderbuffer,
--
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



--
Ben Widawsky, Intel Open Source Technology Center


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/5] i965: Turn on support for image modifiers

2017-03-20 Thread Jason Ekstrand
On Mon, Mar 20, 2017 at 4:15 PM, Ben Widawsky  wrote:

> On 17-03-20 12:03:24, Jason Ekstrand wrote:
>
>> My only question here is whether or not we want to add a "supported
>> modifiers" query to DRIimage before we bump the version number.  It's
>> probably not super-important because we can always bump the version number
>> again.  Meh.
>>
>>
> Remind me what the reasoning is for support modifiers? I thought we agreed
> to
> just use GET_PLANE2? Supported modifiers is difficult because mesa has no
> idea
> of per plane constraints.


They're for completely different cases.  If you want to scan out, use
GET_PLANE2, end of story.  If, however, you're not trying to scan out and
your window is always going to get composited (think not full-screen) then
you may have a much larger set of modifiers available to you and that set
of modifiers is not determined by GET_PLANE2 but by the GL or whatever is
being used for compositing.


>
> On Sat, Mar 18, 2017 at 1:00 PM, Ben Widawsky  wrote:
>>
>> All the plumbing is in place so the extension just needs to be
>>> advertised.
>>>
>>> Signed-off-by: Ben Widawsky 
>>> ---
>>>  src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
>>> b/src/mesa/drivers/dri/i965/intel_screen.c
>>> index ca725d9202..1ff18b30e4 100644
>>> --- a/src/mesa/drivers/dri/i965/intel_screen.c
>>> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
>>> @@ -948,7 +948,7 @@ intel_from_planar(__DRIimage *parent, int plane, void
>>> *loaderPrivate)
>>>  }
>>>
>>>  static const __DRIimageExtension intelImageExtension = {
>>> -.base = { __DRI_IMAGE, 13 },
>>> +.base = { __DRI_IMAGE, 14 },
>>>
>>>  .createImageFromName= intel_create_image_from_name,
>>>  .createImageFromRenderbuffer= intel_create_image_from_
>>> renderbuffer,
>>> --
>>> 2.12.0
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>>
> --
> Ben Widawsky, Intel Open Source Technology Center
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/5] i965: Turn on support for image modifiers

2017-03-20 Thread Ben Widawsky

On 17-03-20 12:03:24, Jason Ekstrand wrote:

My only question here is whether or not we want to add a "supported
modifiers" query to DRIimage before we bump the version number.  It's
probably not super-important because we can always bump the version number
again.  Meh.



Remind me what the reasoning is for support modifiers? I thought we agreed to
just use GET_PLANE2? Supported modifiers is difficult because mesa has no idea
of per plane constraints.


On Sat, Mar 18, 2017 at 1:00 PM, Ben Widawsky  wrote:


All the plumbing is in place so the extension just needs to be
advertised.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
b/src/mesa/drivers/dri/i965/intel_screen.c
index ca725d9202..1ff18b30e4 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -948,7 +948,7 @@ intel_from_planar(__DRIimage *parent, int plane, void
*loaderPrivate)
 }

 static const __DRIimageExtension intelImageExtension = {
-.base = { __DRI_IMAGE, 13 },
+.base = { __DRI_IMAGE, 14 },

 .createImageFromName= intel_create_image_from_name,
 .createImageFromRenderbuffer= intel_create_image_from_
renderbuffer,
--
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



--
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Shut up major()/minor() warnings.

2017-03-20 Thread Matt Turner
Yep, lots of patches like this in Gentoo.

Acked-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Shut up major()/minor() warnings.

2017-03-20 Thread Kenneth Graunke
Recent glibc generates this warning:

brw_performance_query.c:1648:13: warning: In the GNU C Library, "minor" is 
defined
 by . For historical compatibility, it is
 currently defined by  as well, but we plan to
 remove this soon. To use "minor", include 
 directly. If you did not intend to use a system-defined macro
 "minor", you should undefine it after including .

min = minor(sb.st_rdev);

So, include sys/sysmacros.h to shut up the warning.
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 2e04e091d29..17e0d6f0cdf 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
-- 
2.12.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 000/140] RadeonSI: Initial Vega10 support

2017-03-20 Thread Marek Olšák
It's also here:

https://cgit.freedesktop.org/~mareko/mesa/log/?h=vega10

Marek

On Mon, Mar 20, 2017 at 11:42 PM, Marek Olšák  wrote:
> Hi,
>
> This is initial Vega10 support for radeonsi. It supports everything
> except geometry shaders and tessellation, so it's limited to GL 3.1.
> The missing features are under way.
>
> There is also UVD and VCE support.
>
> The first 57 patches only update addrlib. Vega10 support is the rest.
>
> Please review.
>
> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 139/140] radeon/vce: update vce support to firmware 53.19.4

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_vce.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
b/src/gallium/drivers/radeon/radeon_vce.c
index b5ace7b..70c1e60 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -52,7 +52,7 @@
 #define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
 #define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
-#define FW_53_17_4 ((53 << 24) | (17 << 16) | (4 << 8))
+#define FW_53_19_4 ((53 << 24) | (19 << 16) | (4 << 8))
 
 /**
  * flush commands to the hardware
@@ -510,7 +510,7 @@ struct pipe_video_codec *rvce_create_encoder(struct 
pipe_context *context,
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
break;
-   case FW_53_17_4:
+   case FW_53_19_4:
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
break;
@@ -546,7 +546,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen 
*rscreen)
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
-   case FW_53_17_4:
+   case FW_53_19_4:
return true;
default:
return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Hash Collision Risk Maths with Salt values.

2017-03-20 Thread Peter Dolding
> Peter,
>
> While there may be things of value in your replies I would kindly ask
> you [again] to try and keep your replies brief.
> If one is to choose between working on a feature/bug and reading a
> 900+ (word) email I'd imagine they'll choose the former.
>
> I'm saying this for your own good - hopefully you'll consider it as
> such and adjust accordingly.
>
> Thanks
> Emil

That is reduced down from 1 pages of cryptography PHD document.

If you are scared off by a email the size I posted you most likely
should stay way from any bug with a cryptography side because you will
not be willing to read enough to understand what you have done..

At least the following section needs to be read:

> With SHA1 you start with 160 bits of unique values for items hashed.

> Lets say we add a build_id into the mix.   This changes per version.
>  So you have released 2 different build ids how many unique hashed
> values do you have if you place them all in one directory.  The answer
> is now 159 of course you have just lost 1 bit of uniqueness.  Or
> instead of 6.6 million years only 3.3 million years.

> It is luckily linear  so take numbers number of salts factor by 2 take
> that number of bits off your hash then calculate your probability of
> collision.   So using a 512 bit hash really makes this hard using a
> shorter hash getting down into thousand of years might not take that
> ong at all.

> Not putting different salted hashes in the same location also reduces
> odds of collision due to avoiding salt effect.   So we give the
> build_id and gpu_id  their own directory this means the build_id and
> gpu_id salts have not effected the outcome as much. The result is you
> still have have all the hash bits of uniqueness.  So build_id and
> gpu_id has not resulted in subtracting bits off the hash by giving
> them directories.
This is slightly wrong worded.  Should not be "avoid salt effects."
but  "Reduced salt effects to collisions caused by minor variations in
data"

> So build_id and gpu_id values would be salts.

> The fastest way to really undermine a hash you are depending being
> unique is to be careless with salt effects.   Add enough salt effects
> when you run the maths your hash might have no bits left so making a
> collision likely.   It is possible to add enough salts to consume up a
> 512 bit hash example being.  32 salts with 256(16^2) unique values
> each you have just made collisions likely with a 512bit hash.A
> hash like SHA1 failure point due to salting  10 salts with 256(16^2)
> unique values each.   So each of the individual salts damage to the
> hash does not have to be that much but accumulative can be quite a
> large problem.

The bits after that cover how the git example is wrong for the case at
hand and operational considerations and and what kind of attack and
what classification the current work around are.   This first bit
covers you basic working maths like newton model of gravity for risk
of hash collisions by adding salts.  Like its not 100 percent right
but it takes less than 1000 pages mathematics to know if the hash is
at possible risk or not from the salts you have added in the worst
possible case.

In a lot of ways I have over compacted this already and lost a lot of
the finer details but in this use case I don't think the finer details
are important.  I have already compacted to the point I have lost 1 or
2 finer details that might be important.   So I was not expecting to
be told this needs to be shorter to be completely correct it need to
be longer.

Peter Dolding
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 131/140] radeon/uvd: add uvd soc15 register

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
Acked-by: Alex Deucher 
---
 src/gallium/drivers/radeon/radeon_uvd.c | 26 ++
 src/gallium/drivers/radeon/radeon_uvd.h |  5 +
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_uvd.c 
b/src/gallium/drivers/radeon/radeon_uvd.c
index c1746f8..7c6ea93 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -91,6 +91,12 @@ struct ruvd_decoder {
booluse_legacy;
struct rvid_buffer  ctx;
struct rvid_buffer  sessionctx;
+   struct {
+   unsigneddata0;
+   unsigneddata1;
+   unsignedcmd;
+   unsignedcntl;
+   } reg;
 };
 
 /* flush IB to the hardware */
@@ -120,14 +126,14 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned 
cmd,
uint64_t addr;
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
-   set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
-   set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
+   set_reg(dec, dec->reg.data0, addr);
+   set_reg(dec, dec->reg.data1, addr >> 32);
} else {
off += dec->ws->buffer_get_reloc_offset(buf);
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
}
-   set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
+   set_reg(dec, dec->reg.cmd, cmd << 1);
 }
 
 /* do the codec needs an IT buffer ?*/
@@ -1150,7 +1156,7 @@ static void ruvd_end_frame(struct pipe_video_codec 
*decoder,
if (have_it(dec))
send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, 
msg_fb_it_buf->res->buf,
 FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, 
RADEON_DOMAIN_GTT);
-   set_reg(dec, RUVD_ENGINE_CNTL, 1);
+   set_reg(dec, dec->reg.cntl, 1);
 
flush(dec, RADEON_FLUSH_ASYNC);
next_buffer(dec);
@@ -1284,6 +1290,18 @@ struct pipe_video_codec *ruvd_create_decoder(struct 
pipe_context *context,
rvid_clear_buffer(context, >sessionctx);
}
 
+   if (info.family >= CHIP_VEGA10) {
+   dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
+   dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
+   dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
+   dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15;
+   } else {
+   dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
+   dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
+   dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
+   dec->reg.cntl = RUVD_ENGINE_CNTL;
+   }
+
map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_CREATE;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.h 
b/src/gallium/drivers/radeon/radeon_uvd.h
index e3f8504..a5af9ea 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/src/gallium/drivers/radeon/radeon_uvd.h
@@ -56,6 +56,11 @@
 #define RUVD_GPCOM_VCPU_DATA1  0xEF14
 #define RUVD_ENGINE_CNTL   0xEF18
 
+#define RUVD_GPCOM_VCPU_CMD_SOC15  0x2070c
+#define RUVD_GPCOM_VCPU_DATA0_SOC150x20710
+#define RUVD_GPCOM_VCPU_DATA1_SOC150x20714
+#define RUVD_ENGINE_CNTL_SOC15 0x20718
+
 /* UVD commands to VCPU */
 #define RUVD_CMD_MSG_BUFFER0x
 #define RUVD_CMD_DPB_BUFFER0x0001
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 134/140] winsys/surface: add height pitch for gfx9

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
Acked-by: Alex Deucher 
---
 src/gallium/drivers/radeon/radeon_winsys.h | 1 +
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 7839170..43f74f6 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -363,6 +363,7 @@ struct gfx9_surf_layout {
 /* The size of the 2D plane containing all mipmap levels. */
 uint64_tsurf_slice_size;
 uint16_tsurf_pitch; /* in blocks */
+uint16_tsurf_height;
 /* Y mipmap level offset in blocks. Only valid for LINEAR. */
 uint16_tsurf_ymip_offset[RADEON_SURF_MAX_LEVELS];
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 48df27b..f46af3d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -658,6 +658,7 @@ static int gfx9_compute_miptree(struct amdgpu_winsys *ws,
out.mipChainPitch - 1;
surf->u.gfx9.surf_slice_size = out.sliceSize;
surf->u.gfx9.surf_pitch = out.pitch;
+   surf->u.gfx9.surf_height = out.height;
surf->surf_size = out.surfSize;
surf->surf_alignment = out.baseAlign;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 121/140] radeonsi/gfx9: don't write mipmap level offsets to BO metadata

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

GFX9 doesn't have (usable) mipmap offsets.
---
 src/gallium/drivers/radeonsi/si_state.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 1f1b4db..82f2793 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4052,12 +4052,15 @@ static void si_query_opaque_metadata(struct 
r600_common_screen *rscreen,
 
/* Dwords [2:9] contain the image descriptor. */
memcpy(>metadata[2], desc, sizeof(desc));
+   md->size_metadata = 10 * 4;
 
/* Dwords [10:..] contain the mipmap level offsets. */
-   for (i = 0; i <= res->last_level; i++)
-   md->metadata[10+i] = rtex->surface.u.legacy.level[i].offset >> 
8;
+   if (rscreen->chip_class <= VI) {
+   for (i = 0; i <= res->last_level; i++)
+   md->metadata[10+i] = 
rtex->surface.u.legacy.level[i].offset >> 8;
 
-   md->size_metadata = (11 + res->last_level) * 4;
+   md->size_metadata += (1 + res->last_level) * 4;
+   }
 }
 
 static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] i965: Handle Y-tiled modifier

2017-03-20 Thread Ben Widawsky

On 17-03-20 15:36:37, Jason Ekstrand wrote:

On Mon, Mar 20, 2017 at 3:25 PM, Ben Widawsky  wrote:


On 17-03-20 12:00:44, Jason Ekstrand wrote:


On Fri, Mar 17, 2017 at 5:34 PM, Ben Widawsky  wrote:

This patch begins introducing how we'll actually handle the potentially

many modifiers coming in from the API, how we'll store them, and the
structure in the code to support it.

Prior to this patch, the Y-tiled modifier would be entirely ignored. It
shouldn't actually be used until this point because we've not bumped the
DRIimage extension version (which is a requirement to use modifiers).

With X-tiling:
Writes:  6,583.58 MiB
Reads:   6,540.93 MiB

With Y-tiling:
Writes:  5,361.78 MiB
Reads6,052.45 MiB

Savings per frame
Writes:  2 MiB
Reads:  .8 MiB

Similar functionality was introduced and then reverted here:

commit 6a0d036483caf87d43ebe2edd1905873446c9589
Author: Ben Widawsky 
Date:   Thu Apr 21 20:14:58 2016 -0700

i965: Always use Y-tiled buffers on SKL+

v2: Use last set bit instead of first set bit in modifiers to address
bug found by Daniel Stone.

Signed-off-by: Ben Widawsky 
Reviewed-by: Eric Engestrom 
Acked-by: Daniel Stone 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 55

 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
b/src/mesa/drivers/dri/i965/intel_screen.c
index 22ab3a30b6..1954757d1e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -23,6 +23,7 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */

+#include 
 #include 
 #include 
 #include 
@@ -520,16 +521,35 @@ select_best_modifier(struct gen_device_info
*devinfo,
  const uint64_t *modifiers,
  const unsigned count)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+#define YTILE  (1 << 1)
+#define LINEAR (1 << 0)
+
+   const uint64_t prio_modifiers[] = { I915_FORMAT_MOD_Y_TILED,
DRM_FORMAT_MOD_LINEAR };
+   uint32_t modifier_bitmask = 0; /* API only allows 32 */



The bitfield thing is still confusing to me.  Here's an idea on how we
could maybe make it better.

enum modifier_priority {
  MODIFIER_PRIORITY_LINEAR,
  MODIFIER_PRIORITY_X,
  MODIFIER_PRIORITY_Y,
  MODIFIER_PRIORITY_Y_CCS,
};

uint32_t priority_to_modifier[] = {
  [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
  [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED,
  [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
  [MODIFIER_PRIORITY_Y_CCS] = I915_FORMAT_MOD_Y_TILED_CCS,
}

enum modier_priority prio = 0;
for (int i = 0; i < count; i++) {
  switch (modifiers[i]) {
  case DRM_FORMAT_MOD_LINEAR:
 prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
 break;

  case DRM_FORMAT_MOD_X_TILED:
 prio = MAX2(prio, MODIFIER_PRIORITY_X);
 break;

  case DRM_FORMAT_MOD_Y_TILED:
 prio = MAX2(prio, MODIFIER_PRIORITY_Y);
 break;

  case DRM_FORMAT_MOD_Y_TILIED_CCS:
 prio = MAX2(prio, MODIFIER_PRIORITY_Y_CCS);
 break;
}

return priority_to_modifier[prio];

How does this strike your fancy?  I'm ok with the bit set approach if you
really prefer it but I find it hard to reason about.



I don't really prefer. This looks pretty good. Seems no less complex to
me, but
I wrote the first one, so perhaps I'm partial.

Originally, I had some code in the equivalent function (before select_best
was
separate) which would try fallbacks, ie. if Y tiled allocation failed,
it'd go
down to the next modifier just walking down the bits, but logic is now
gone, so
there isn't really a point in the bitmask.

Will respin with this and the fixes meant below.





for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
   case DRM_FORMAT_MOD_LINEAR:
- return modifiers[i];
+ modifier_bitmask |= LINEAR;
+ break;
+  case I915_FORMAT_MOD_Y_TILED:
+ if (devinfo->gen < 9) {
+_mesa_warning(NULL, "Invalid Y-tiling parameter\n");
+continue;



This isn't invalid.  It's just invalid for scanout.  If you wanted to
create an image to share between two GL implementations, Y-tiling works
fine on everything.




As a general function to support modifiers, you are correct, however since
this
is only called for image creation, I believe the existing warning is
correct.



But what if I want to create an image to share between two userspace
processes with no scanout involved?  While the GBM portion of the API is
mostly intended for scanout, the EGL extension will be something that can
and will be used for GL <-> GL.  I guess we can always flip it on when we
add support for the EGL extension but I see no reason why it shouldn't work
through GBM.

Part of the problem may be that I really don't understand why GBM exists.
It's like a linux-specific half-of-EGL 

[Mesa-dev] [PATCH 138/140] radeon/uvd: set correct vega10 db pitch alignment

2017-03-20 Thread Marek Olšák
From: Boyuan Zhang 

Create new function to get correct alignment based on Asics, and change
the corresponding decode message buffer and dpb buffer size calculations

Signed-off-by: Boyuan Zhang 
Reviewed-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_uvd.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_uvd.c 
b/src/gallium/drivers/radeon/radeon_uvd.c
index 1d1e40a..d5352d9 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -330,6 +330,14 @@ static unsigned calc_ctx_size_h265_main10(struct 
ruvd_decoder *dec, struct pipe_
return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
 }
 
+static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
+{
+   if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10)
+   return 16;
+   else
+   return 32;
+}
+
 /* calculate size of reference picture buffer */
 static unsigned calc_dpb_size(struct ruvd_decoder *dec)
 {
@@ -343,7 +351,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
unsigned max_references = dec->base.max_references + 1;
 
// aligned size of a single frame
-   image_size = width * height;
+   image_size = align(width, get_db_pitch_alignment(dec)) * height;
image_size += image_size / 2;
image_size = align(image_size, 1024);
 
@@ -418,9 +426,9 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
width = align (width, 16);
height = align (height, 16);
if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
-   dpb_size = align((width * height * 9) / 4, 256) * 
max_references;
+   dpb_size = align((align(width, 
get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references;
else
-   dpb_size = align((width * height * 3) / 2, 256) * 
max_references;
+   dpb_size = align((align(width, 
get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references;
break;
 
case PIPE_VIDEO_FORMAT_VC1:
@@ -1085,7 +1093,7 @@ static void ruvd_end_frame(struct pipe_video_codec 
*decoder,
 
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
-   dec->msg->body.decode.db_pitch = align(dec->base.width, 16);
+   dec->msg->body.decode.db_pitch = align(dec->base.width, 
get_db_pitch_alignment(dec));
 
if (dec->stream_type == RUVD_CODEC_H264_PERF &&
((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 116/140] radeonsi/gfx9: emit BREAK_BATCH in emit_framebuffer_state

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index d0c9072..134a520 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2804,6 +2804,11 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
   S_028208_BR_X(state->width) | 
S_028208_BR_Y(state->height));
 
+   if (sctx->b.chip_class >= GFX9) {
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+   radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | 
EVENT_INDEX(0));
+   }
+
sctx->framebuffer.dirty_cbufs = 0;
sctx->framebuffer.dirty_zsbuf = false;
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 132/140] radeon/uvd: adapt gfx9 surface to uvd

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
Acked-by: Alex Deucher 
---
 src/gallium/drivers/r600/r600_uvd.c   |   4 +-
 src/gallium/drivers/radeon/radeon_uvd.c   | 103 +++---
 src/gallium/drivers/radeon/radeon_uvd.h   |   7 +-
 src/gallium/drivers/radeon/radeon_video.c |  38 +++
 src/gallium/drivers/radeon/radeon_video.h |   2 +-
 src/gallium/drivers/radeonsi/si_uvd.c |   8 ++-
 6 files changed, 106 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_uvd.c 
b/src/gallium/drivers/r600/r600_uvd.c
index f1b1ca8..cb732385 100644
--- a/src/gallium/drivers/r600/r600_uvd.c
+++ b/src/gallium/drivers/r600/r600_uvd.c
@@ -115,7 +115,7 @@ struct pipe_video_buffer *r600_video_buffer_create(struct 
pipe_context *pipe,
surfaces[i] = [i]->surface;
}
 
-   rvid_join_surfaces(ctx->b.ws, pbs, surfaces);
+   rvid_join_surfaces(>b, pbs, surfaces);
 
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!resources[i])
@@ -162,7 +162,7 @@ static struct pb_buffer* r600_uvd_set_dtb(struct ruvd_msg 
*msg, struct vl_video_
msg->body.decode.dt_field_mode = buf->base.interlaced;
msg->body.decode.dt_surf_tile_config |= 
RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks));
 
-   ruvd_set_dt_surfaces(msg, >surface, >surface);
+   ruvd_set_dt_surfaces(msg, >surface, >surface, 
RUVD_SURFACE_TYPE_LEGACY);
 
return luma->resource.buf;
 }
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c 
b/src/gallium/drivers/radeon/radeon_uvd.c
index 7c6ea93..ea614c1 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -1337,10 +1337,20 @@ error:
 }
 
 /* calculate top/bottom offset */
-static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
+static unsigned texture_offset(struct radeon_surf *surface, unsigned layer,
+   enum ruvd_surface_type type)
 {
-   return surface->u.legacy.level[0].offset +
-   layer * surface->u.legacy.level[0].slice_size;
+   switch (type) {
+   default:
+   case RUVD_SURFACE_TYPE_LEGACY:
+   return surface->u.legacy.level[0].offset +
+   layer * surface->u.legacy.level[0].slice_size;
+   break;
+   case RUVD_SURFACE_TYPE_GFX9:
+   return surface->u.gfx9.surf_offset +
+   layer * surface->u.gfx9.surf_slice_size;
+   break;
+   }
 }
 
 /* hw encode the aspect of macro tiles */
@@ -1373,42 +1383,63 @@ static unsigned bank_wh(unsigned bankwh)
  * fill decoding target field from the luma and chroma surfaces
  */
 void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma)
+   struct radeon_surf *chroma, enum ruvd_surface_type type)
 {
-   msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x;
-   switch (luma->u.legacy.level[0].mode) {
-   case RADEON_SURF_MODE_LINEAR_ALIGNED:
-   msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
-   msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
-   break;
-   case RADEON_SURF_MODE_1D:
-   msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
-   msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
-   break;
-   case RADEON_SURF_MODE_2D:
-   msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
-   msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
-   break;
+   switch (type) {
default:
-   assert(0);
-   break;
-   }
+   case RUVD_SURFACE_TYPE_LEGACY:
+   msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x;
+   switch (luma->u.legacy.level[0].mode) {
+   case RADEON_SURF_MODE_LINEAR_ALIGNED:
+   msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+   msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+   break;
+   case RADEON_SURF_MODE_1D:
+   msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+   msg->body.decode.dt_array_mode = 
RUVD_ARRAY_MODE_1D_THIN;
+   break;
+   case RADEON_SURF_MODE_2D:
+   msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+   msg->body.decode.dt_array_mode = 
RUVD_ARRAY_MODE_2D_THIN;
+   break;
+   default:
+   assert(0);
+   break;
+   }
 
-   msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
-   msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
-   if (msg->body.decode.dt_field_mode) {
-   

[Mesa-dev] [PATCH 104/140] radeonsi/gfx9: pass correct parameters to buffer_get_handle

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index ebd2c5c..129b3f4 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -500,6 +500,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* 
screen,
struct r600_texture *rtex = (struct r600_texture*)resource;
struct radeon_bo_metadata metadata;
bool update_metadata = false;
+   unsigned stride, offset, slice_size;
 
/* This is not supported now, but it might be required for OpenCL
 * interop in the future.
@@ -553,12 +554,19 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
res->external_usage = usage;
}
 
-   return rscreen->ws->buffer_get_handle(res->buf,
- 
rtex->surface.u.legacy.level[0].nblk_x *
- rtex->surface.bpe,
- 
rtex->surface.u.legacy.level[0].offset,
- 
rtex->surface.u.legacy.level[0].slice_size,
- whandle);
+   if (rscreen->chip_class >= GFX9) {
+   offset = 0;
+   stride = rtex->surface.u.gfx9.surf_pitch *
+rtex->surface.bpe;
+   slice_size = rtex->surface.u.gfx9.surf_slice_size;
+   } else {
+   offset = rtex->surface.u.legacy.level[0].offset;
+   stride = rtex->surface.u.legacy.level[0].nblk_x *
+rtex->surface.bpe;
+   slice_size = rtex->surface.u.legacy.level[0].slice_size;
+   }
+   return rscreen->ws->buffer_get_handle(res->buf, stride, offset,
+ slice_size, whandle);
 }
 
 static void r600_texture_destroy(struct pipe_screen *screen,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 119/140] radeonsi/gfx9: use ACQUIRE_MEM

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index f01ac01..a80f021 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -754,12 +754,23 @@ static void si_emit_surface_sync(struct 
r600_common_context *rctx,
 {
struct radeon_winsys_cs *cs = rctx->gfx.cs;
 
-   /* ACQUIRE_MEM is only required on a compute ring. */
-   radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
-   radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-   radeon_emit(cs, 0x);  /* CP_COHER_SIZE */
-   radeon_emit(cs, 0);   /* CP_COHER_BASE */
-   radeon_emit(cs, 0x000A);  /* POLL_INTERVAL */
+   if (rctx->chip_class >= GFX9) {
+   /* Flush caches and wait for the caches to assert idle. */
+   radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
+   radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+   radeon_emit(cs, 0x);/* CP_COHER_SIZE */
+   radeon_emit(cs, 0xff);  /* CP_COHER_SIZE_HI */
+   radeon_emit(cs, 0); /* CP_COHER_BASE */
+   radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
+   radeon_emit(cs, 0x000A);/* POLL_INTERVAL */
+   } else {
+   /* ACQUIRE_MEM is only required on a compute ring. */
+   radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+   radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+   radeon_emit(cs, 0x);  /* CP_COHER_SIZE */
+   radeon_emit(cs, 0);   /* CP_COHER_BASE */
+   radeon_emit(cs, 0x000A);  /* POLL_INTERVAL */
+   }
 }
 
 void si_emit_cache_flush(struct si_context *sctx)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 112/140] radeonsi/gfx9: enable clamping for Z UNORM formats promoted to Z32F

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

so that shaders don't have to do it.
---
 src/gallium/drivers/radeonsi/si_state.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index e7c25bc..d44cb44 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2978,13 +2978,14 @@ si_make_texture_descriptor(struct si_screen *screen,
   uint32_t *fmask_state)
 {
struct pipe_resource *res = >resource.b.b;
-   const struct util_format_description *desc;
+   const struct util_format_description *base_desc, *desc;
unsigned char swizzle[4];
int first_non_void;
unsigned num_format, data_format, type;
uint64_t va;
 
desc = util_format_description(pipe_format);
+   base_desc = util_format_description(res->format);
 
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
const unsigned char swizzle_[4] = {0, 0, 0, 0};
@@ -3085,6 +3086,15 @@ si_make_texture_descriptor(struct si_screen *screen,
data_format = 0;
}
 
+   /* Enable clamping for UNORM depth formats promoted to Z32F. */
+   if (screen->b.chip_class >= GFX9 &&
+   util_format_has_depth(desc) &&
+   num_format == V_008F14_IMG_NUM_FORMAT_FLOAT &&
+   util_get_depth_format_type(base_desc) != UTIL_FORMAT_TYPE_FLOAT) {
+   /* NUM_FORMAT=FLOAT and DATA_FORMAT=24_8 means "clamp to 
[0,1]". */
+   data_format = V_008F14_IMG_DATA_FORMAT_24_8;
+   }
+
if (!sampler &&
(res->target == PIPE_TEXTURE_CUBE ||
 res->target == PIPE_TEXTURE_CUBE_ARRAY ||
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 137/140] radeon/vce: update for firwmare 53.17

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_vce.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
b/src/gallium/drivers/radeon/radeon_vce.c
index be2..b5ace7b 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -52,7 +52,7 @@
 #define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
 #define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
-#define FW_53_14_4 ((53 << 24) | (14 << 16) | (4 << 8))
+#define FW_53_17_4 ((53 << 24) | (17 << 16) | (4 << 8))
 
 /**
  * flush commands to the hardware
@@ -510,7 +510,7 @@ struct pipe_video_codec *rvce_create_encoder(struct 
pipe_context *context,
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
break;
-   case FW_53_14_4:
+   case FW_53_17_4:
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
break;
@@ -546,7 +546,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen 
*rscreen)
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
-   case FW_53_14_4:
+   case FW_53_17_4:
return true;
default:
return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 130/140] radeonsi/gfx9: disable features that don't work

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c   | 8 +++-
 src/gallium/drivers/radeonsi/si_shader.c| 5 +++--
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 4 +++-
 src/gallium/drivers/radeonsi/si_state.c | 3 ++-
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index c6d2381..dbcfd58 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1625,7 +1625,9 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
 * Use the staging texture for uploads if the underlying BO
 * is busy.
 */
-   if (!rtex->surface.is_linear)
+   /* TODO: Linear CPU mipmap addressing is broken on GFX9: */
+   if (!rtex->surface.is_linear ||
+   (rctx->chip_class == GFX9 && level))
use_staging_texture = true;
else if (usage & PIPE_TRANSFER_READ)
use_staging_texture =
@@ -2632,6 +2634,10 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
if (rctx->render_cond)
return;
 
+   /* TODO: fix CMASK and DCC fast clear */
+   if (rctx->chip_class >= GFX9)
+   return;
+
for (i = 0; i < fb->nr_cbufs; i++) {
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 74fef36..93b6a6e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4590,8 +4590,9 @@ static void tex_fetch_args(
 * The sample index should be adjusted as follows:
 *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
 */
-   if (target == TGSI_TEXTURE_2D_MSAA ||
-   target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+   if (ctx->screen->b.chip_class <= VI && /* TODO: fix FMASK on GFX9 */
+   (target == TGSI_TEXTURE_2D_MSAA ||
+target == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
struct lp_build_context *uint_bld = _base->uint_bld;
struct lp_build_emit_data txf_emit_data = *emit_data;
LLVMValueRef txf_address[4];
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 7965b46..efc9452 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -834,7 +834,9 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
 *
 * LLVM 3.8 crashes with this.
 */
-   if (HAVE_LLVM >= 0x0309 && array_size > 16) {
+   if ((HAVE_LLVM >= 0x0309 && array_size > 16) ||
+   /* TODO: VGPR indexing is buggy on GFX9. */
+   ctx->screen->b.chip_class == GFX9) {
array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(bld_base->base.vec_type,
  array_size), "array");
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index a46bc4b..ad706f0 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2198,7 +2198,8 @@ static void si_initialize_color_surface(struct si_context 
*sctx,
S_028C74_NUM_FRAGMENTS(log_samples);
 
if (rtex->fmask.size) {
-   color_info |= S_028C70_COMPRESSION(1);
+/* TODO: fix FMASK on GFX9: */
+   color_info |= S_028C70_COMPRESSION(sctx->b.chip_class 
<= VI);
unsigned fmask_bankh = 
util_logbase2(rtex->fmask.bank_height);
 
if (sctx->b.chip_class == SI) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 133/140] radeon/uvd: clear message buffer when reuse

2017-03-20 Thread Marek Olšák
From: Leo Liu 

As required by firmware

Signed-off-by: Leo Liu 
Acked-by: Alex Deucher 
---
 src/gallium/drivers/radeon/radeon_uvd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_uvd.c 
b/src/gallium/drivers/radeon/radeon_uvd.c
index ea614c1..1d1e40a 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -157,6 +157,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
 
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
+   memset(dec->msg, 0, sizeof(*dec->msg));
+
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
if (have_it(dec))
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
@@ -941,7 +943,6 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
assert(decoder);
 
map_msg_fb_it_buf(dec);
-   memset(dec->msg, 0, sizeof(*dec->msg));
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DESTROY;
dec->msg->stream_handle = dec->stream_handle;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 127/140] radeonsi/gfx9: don't generate LS and ES states

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

these shaders don't exist on GFX9
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 70 -
 1 file changed, 46 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0087eeb..47de1cd 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -435,12 +435,14 @@ static struct si_pm4_state 
*si_get_shader_pm4_state(struct si_shader *shader)
return shader->pm4;
 }
 
-static void si_shader_ls(struct si_shader *shader)
+static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
 {
struct si_pm4_state *pm4;
unsigned vgpr_comp_cnt;
uint64_t va;
 
+   assert(sscreen->b.chip_class <= VI);
+
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
@@ -497,6 +499,8 @@ static void si_shader_es(struct si_screen *sscreen, struct 
si_shader *shader)
uint64_t va;
unsigned oc_lds_en;
 
+   assert(sscreen->b.chip_class <= VI);
+
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
@@ -924,7 +928,7 @@ static void si_shader_init_pm4_state(struct si_screen 
*sscreen,
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_ls)
-   si_shader_ls(shader);
+   si_shader_ls(sscreen, shader);
else if (shader->key.as_es)
si_shader_es(sscreen, shader);
else
@@ -1892,21 +1896,26 @@ static void si_delete_shader(struct si_context *sctx, 
struct si_shader *shader)
if (shader->pm4) {
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
-   if (shader->key.as_ls)
+   if (shader->key.as_ls) {
+   assert(sctx->b.chip_class <= VI);
si_pm4_delete_state(sctx, ls, shader->pm4);
-   else if (shader->key.as_es)
+   } else if (shader->key.as_es) {
+   assert(sctx->b.chip_class <= VI);
si_pm4_delete_state(sctx, es, shader->pm4);
-   else
+   } else {
si_pm4_delete_state(sctx, vs, shader->pm4);
+   }
break;
case PIPE_SHADER_TESS_CTRL:
si_pm4_delete_state(sctx, hs, shader->pm4);
break;
case PIPE_SHADER_TESS_EVAL:
-   if (shader->key.as_es)
+   if (shader->key.as_es) {
+   assert(sctx->b.chip_class <= VI);
si_pm4_delete_state(sctx, es, shader->pm4);
-   else
+   } else {
si_pm4_delete_state(sctx, vs, shader->pm4);
+   }
break;
case PIPE_SHADER_GEOMETRY:
if (shader->is_gs_copy_shader)
@@ -2567,10 +2576,13 @@ bool si_update_shaders(struct si_context *sctx)
}
 
/* VS as LS */
-   r = si_shader_select(ctx, >vs_shader, _state);
-   if (r)
-   return false;
-   si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
+   if (sctx->b.chip_class <= VI) {
+   r = si_shader_select(ctx, >vs_shader,
+_state);
+   if (r)
+   return false;
+   si_pm4_bind_state(sctx, ls, 
sctx->vs_shader.current->pm4);
+   }
 
if (sctx->tcs_shader.cso) {
r = si_shader_select(ctx, >tcs_shader,
@@ -2593,27 +2605,36 @@ bool si_update_shaders(struct si_context *sctx)
  
sctx->fixed_func_tcs_shader.current->pm4);
}
 
-   r = si_shader_select(ctx, >tes_shader, _state);
-   if (r)
-   return false;
-
if (sctx->gs_shader.cso) {
/* TES as ES */
-   si_pm4_bind_state(sctx, es, 
sctx->tes_shader.current->pm4);
+   if (sctx->b.chip_class <= VI) {
+   r = si_shader_select(ctx, >tes_shader,
+_state);
+   if (r)
+   return false;
+   si_pm4_bind_state(sctx, es, 
sctx->tes_shader.current->pm4);
+   }
} else {
/* TES as VS */
+ 

[Mesa-dev] [PATCH 140/140] radeonsi: add Vega10 PCI IDs

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 include/pci_ids/radeonsi_pci_ids.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/pci_ids/radeonsi_pci_ids.h 
b/include/pci_ids/radeonsi_pci_ids.h
index b14291d..f4139ea 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -213,3 +213,11 @@ CHIPSET(0x6986, POLARIS12_, POLARIS12)
 CHIPSET(0x6987, POLARIS12_, POLARIS12)
 CHIPSET(0x6995, POLARIS12_, POLARIS12)
 CHIPSET(0x699F, POLARIS12_, POLARIS12)
+
+CHIPSET(0x6860, VEGA10_, VEGA10)
+CHIPSET(0x6861, VEGA10_, VEGA10)
+CHIPSET(0x6862, VEGA10_, VEGA10)
+CHIPSET(0x6863, VEGA10_, VEGA10)
+CHIPSET(0x6867, VEGA10_, VEGA10)
+CHIPSET(0x687F, VEGA10_, VEGA10)
+CHIPSET(0x686C, VEGA10_, VEGA10)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 128/140] radeonsi/gfx9: add linear address computations for texture transfers

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 73 ++-
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 0231fe2..c6d2381 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -177,14 +177,42 @@ static void r600_copy_from_staging_texture(struct 
pipe_context *ctx, struct r600
   src, 0, );
 }
 
-static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned 
level,
-   const struct pipe_box *box)
+static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
+   struct r600_texture *rtex, unsigned 
level,
+   const struct pipe_box *box,
+   unsigned *stride,
+   unsigned *layer_stride)
 {
-   return rtex->surface.u.legacy.level[level].offset +
-  box->z * rtex->surface.u.legacy.level[level].slice_size +
-  (box->y / rtex->surface.blk_h *
-   rtex->surface.u.legacy.level[level].nblk_x +
-   box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+   if (rscreen->chip_class >= GFX9) {
+   *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
+   *layer_stride = rtex->surface.u.gfx9.surf_slice_size;
+
+   if (!box)
+   return 0;
+
+   /* Each texture is an array of slices. Each slice is an array
+* of mipmap levels. */
+   return box->z * rtex->surface.u.gfx9.surf_slice_size +
+  ((rtex->surface.u.gfx9.surf_ymip_offset[level] +
+box->y / rtex->surface.blk_h) *
+   rtex->surface.u.gfx9.surf_pitch +
+   box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+   } else {
+   *stride = rtex->surface.u.legacy.level[level].nblk_x *
+ rtex->surface.bpe;
+   *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
+
+   if (!box)
+   return rtex->surface.u.legacy.level[level].offset;
+
+   /* Each texture is an array of mipmap levels. Each level is
+* an array of slices. */
+   return rtex->surface.u.legacy.level[level].offset +
+  box->z * rtex->surface.u.legacy.level[level].slice_size +
+  (box->y / rtex->surface.blk_h *
+   rtex->surface.u.legacy.level[level].nblk_x +
+   box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+   }
 }
 
 static int r600_init_surface(struct r600_common_screen *rscreen,
@@ -1662,8 +1690,12 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
0, 0, 0, 
box->depth, 0, 0);
pipe_resource_reference(, NULL);
}
-   }
-   else {
+
+   /* Just get the strides. */
+   r600_texture_get_offset(rctx->screen, staging_depth, 
level, NULL,
+   >transfer.stride,
+   >transfer.layer_stride);
+   } else {
/* XXX: only readback the rectangle which is being 
mapped? */
/* XXX: when discard is true, no need to read back from 
depth texture */
if (!r600_init_flushed_depth_texture(ctx, texture, 
_depth)) {
@@ -1677,12 +1709,12 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
box->z, box->z + box->depth 
- 1,
0, 0);
 
-   offset = r600_texture_get_offset(staging_depth, level, 
box);
+   offset = r600_texture_get_offset(rctx->screen, 
staging_depth,
+level, box,
+
>transfer.stride,
+
>transfer.layer_stride);
}
 
-   trans->transfer.stride = 
staging_depth->surface.u.legacy.level[level].nblk_x *
-staging_depth->surface.bpe;
-   trans->transfer.layer_stride = 
staging_depth->surface.u.legacy.level[level].slice_size;
trans->staging = (struct r600_resource*)staging_depth;
buf = trans->staging;
} else if (use_staging_texture) {
@@ -1702,9 +1734,11 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
   

[Mesa-dev] [PATCH 126/140] radeonsi/gfx9: SPI_SHADER_USER_DATA changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_descriptors.c | 45 ---
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index fb82f8f..5450dbe 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1835,15 +1835,21 @@ static void si_set_user_data_base(struct si_context 
*sctx,
 void si_shader_change_notify(struct si_context *sctx)
 {
/* VS can be bound as VS, ES, or LS. */
-   if (sctx->tes_shader.cso)
-   si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
- R_00B530_SPI_SHADER_USER_DATA_LS_0);
-   else if (sctx->gs_shader.cso)
+   if (sctx->tes_shader.cso) {
+   if (sctx->b.chip_class >= GFX9) {
+   si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ 
R_00B430_SPI_SHADER_USER_DATA_LS_0);
+   } else {
+   si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ 
R_00B530_SPI_SHADER_USER_DATA_LS_0);
+   }
+   } else if (sctx->gs_shader.cso) {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
  R_00B330_SPI_SHADER_USER_DATA_ES_0);
-   else
+   } else {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
  R_00B130_SPI_SHADER_USER_DATA_VS_0);
+   }
 
/* TES can be bound as ES, VS, or not bound. */
if (sctx->tes_shader.cso) {
@@ -1891,11 +1897,18 @@ void si_emit_graphics_shader_userdata(struct si_context 
*sctx,
si_emit_shader_pointer(sctx, descs,
   R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_emit_shader_pointer(sctx, descs,
-  R_00B230_SPI_SHADER_USER_DATA_GS_0);
-   si_emit_shader_pointer(sctx, descs,
   R_00B330_SPI_SHADER_USER_DATA_ES_0);
-   si_emit_shader_pointer(sctx, descs,
-  R_00B430_SPI_SHADER_USER_DATA_HS_0);
+
+   /* GFX9 merged LS-HS and ES-GS. Only set RW_BUFFERS for ES and 
LS. */
+   if (sctx->b.chip_class >= GFX9) {
+   si_emit_shader_pointer(sctx, descs,
+  
R_00B430_SPI_SHADER_USER_DATA_LS_0);
+   } else {
+   si_emit_shader_pointer(sctx, descs,
+  
R_00B230_SPI_SHADER_USER_DATA_GS_0);
+   si_emit_shader_pointer(sctx, descs,
+  
R_00B430_SPI_SHADER_USER_DATA_HS_0);
+   }
}
 
mask = sctx->shader_pointers_dirty &
@@ -2007,8 +2020,18 @@ void si_init_all_descriptors(struct si_context *sctx)
 
/* Set default and immutable mappings. */
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 
R_00B130_SPI_SHADER_USER_DATA_VS_0);
-   si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, 
R_00B430_SPI_SHADER_USER_DATA_HS_0);
-   si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, 
R_00B230_SPI_SHADER_USER_DATA_GS_0);
+
+   if (sctx->b.chip_class >= GFX9) {
+   si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0);
+   si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+   } else {
+   si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
+   si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+   }
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, 
R_00B030_SPI_SHADER_USER_DATA_PS_0);
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 125/140] winsys/amdgpu: set/get BO tiling flags for GFX9

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 15e5bce..c805da8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -600,7 +600,7 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer 
*_buf,
 {
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_bo_info info = {0};
-   uint32_t tiling_flags;
+   uint64_t tiling_flags;
int r;
 
assert(bo->bo && "must not be called for slab entries");
@@ -612,6 +612,7 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer 
*_buf,
tiling_flags = info.metadata.tiling_info;
 
if (bo->ws->info.chip_class >= GFX9) {
+  md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
} else {
   md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
   md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
@@ -639,11 +640,12 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer 
*_buf,
 {
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_bo_metadata metadata = {0};
-   uint32_t tiling_flags = 0;
+   uint64_t tiling_flags = 0;
 
assert(bo->bo && "must not be called for slab entries");
 
if (bo->ws->info.chip_class >= GFX9) {
+  tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
} else {
   if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
  tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 129/140] radeonsi/gfx9: only allow GL 3.1

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5d3cbc5..dde8b1b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -452,6 +452,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return HAVE_LLVM >= 0x0309 ? 4 : 0;
 
case PIPE_CAP_GLSL_FEATURE_LEVEL:
+   if (sscreen->b.chip_class >= GFX9)
+   return 140;
if (si_have_tgsi_compute(sscreen))
return 450;
return HAVE_LLVM >= 0x0309 ? 420 : 410;
@@ -576,9 +578,12 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
{
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
+   break;
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_TESS_CTRL:
case PIPE_SHADER_TESS_EVAL:
+   if (sscreen->b.chip_class >= GFX9)
+   return 0;
break;
case PIPE_SHADER_COMPUTE:
switch (param) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 117/140] radeonsi/gfx9: emit FLUSH_DFSM where required

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.h  |  1 +
 src/gallium/drivers/radeonsi/si_state.c | 17 +
 2 files changed, 18 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 174baaa..e1df3b6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -259,6 +259,7 @@ struct si_context {
struct r600_atommsaa_config;
struct si_sample_mask   sample_mask;
struct r600_atomcb_render_state;
+   unsignedlast_cb_target_mask;
struct si_blend_color   blend_color;
struct r600_atomclip_regs;
struct si_clip_stateclip_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 134a520..1f1b4db 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -117,6 +117,17 @@ static void si_emit_cb_render_state(struct si_context 
*sctx, struct r600_atom *a
 
radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
 
+   /* GFX9: Flush DFSM when CB_TARGET_MASK changes.
+* I think we don't have to do anything between IBs.
+*/
+   if (sctx->b.chip_class >= GFX9 &&
+   sctx->last_cb_target_mask != cb_target_mask) {
+   sctx->last_cb_target_mask = cb_target_mask;
+
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+   radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | 
EVENT_INDEX(0));
+   }
+
/* RB+ register settings. */
if (sctx->screen->b.rbplus_allowed) {
unsigned spi_shader_col_format =
@@ -2877,6 +2888,12 @@ static void si_emit_msaa_config(struct si_context *sctx, 
struct r600_atom *atom)
sctx->ps_iter_samples,
sctx->smoothing_enabled ? 
SI_NUM_SMOOTH_AA_SAMPLES : 0,
sc_mode_cntl_1);
+
+   /* GFX9: Flush DFSM when the AA mode changes. */
+   if (sctx->b.chip_class >= GFX9) {
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+   radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | 
EVENT_INDEX(0));
+   }
 }
 
 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 109/140] radeonsi/gfx9: image descriptor changes in immutable fields

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

The border color swizzle logic was copied from Vulkan. It doesn't make any
sense to me, but it passes all piglits except the stencil ones.
---
 src/amd/common/gfx9d.h  |  6 
 src/gallium/drivers/radeonsi/si_pipe.c  |  3 +-
 src/gallium/drivers/radeonsi/si_state.c | 56 ++---
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
index 797bdcc..e295a1d 100644
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -1466,6 +1466,12 @@
 #define   S_008F20_BC_SWIZZLE(x)  
(((unsigned)(x) & 0x07) << 29)
 #define   G_008F20_BC_SWIZZLE(x)  (((x) >> 
29) & 0x07)
 #define   C_008F20_BC_SWIZZLE 
0x1FFF
+#define V_008F20_BC_SWIZZLE_XYZW   0
+#define V_008F20_BC_SWIZZLE_XWYZ   1
+#define V_008F20_BC_SWIZZLE_WZYX   2
+#define V_008F20_BC_SWIZZLE_WXYZ   3
+#define V_008F20_BC_SWIZZLE_ZYXW   4
+#define V_008F20_BC_SWIZZLE_YXWZ   5
 #define R_008F24_SQ_IMG_RSRC_WORD5  
0x008F24
 #define   S_008F24_BASE_ARRAY(x)  
(((unsigned)(x) & 0x1FFF) << 0)
 #define   G_008F24_BASE_ARRAY(x)  (((x) >> 
0) & 0x1FFF)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 8904b9d..5d3cbc5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -497,7 +497,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return 30;
 
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
-   return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
+   return sscreen->b.chip_class <= VI ?
+   PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
 
/* Stream output. */
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index f880c31..90f5c77 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2932,6 +2932,35 @@ si_make_buffer_descriptor(struct si_screen *screen, 
struct r600_resource *buf,
   S_008F0C_DATA_FORMAT(data_format);
 }
 
+static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
+{
+   unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+
+   if (swizzle[3] == PIPE_SWIZZLE_X) {
+   /* For the pre-defined border color values (white, opaque
+* black, transparent black), the only thing that matters is
+* that the alpha channel winds up in the correct place
+* (because the RGB channels are all the same) so either of
+* these enumerations will work.
+*/
+   if (swizzle[2] == PIPE_SWIZZLE_Y)
+   bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
+   else
+   bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
+   } else if (swizzle[0] == PIPE_SWIZZLE_X) {
+   if (swizzle[1] == PIPE_SWIZZLE_Y)
+   bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+   else
+   bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
+   } else if (swizzle[1] == PIPE_SWIZZLE_X) {
+   bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
+   } else if (swizzle[2] == PIPE_SWIZZLE_X) {
+   bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
+   }
+
+   return bc_swizzle;
+}
+
 /**
  * Build the sampler view descriptor for a texture.
  */
@@ -3097,14 +3126,33 @@ si_make_texture_descriptor(struct si_screen *screen,
S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
util_logbase2(res->nr_samples) :
last_level) |
-   S_008F1C_POW2_PAD(res->last_level > 0) |
S_008F1C_TYPE(type));
-   state[4] = S_008F20_DEPTH(depth - 1);
-   state[5] = (S_008F24_BASE_ARRAY(first_layer) |
-   S_008F24_LAST_ARRAY(last_layer));
+   state[4] = 0;
+   state[5] = S_008F24_BASE_ARRAY(first_layer);
state[6] = 0;
state[7] = 0;
 
+   if (screen->b.chip_class >= GFX9) {
+   unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
+
+   /* Depth is the the last accessible layer on Gfx9.
+* The hw doesn't need to know the total number of layers.
+*/
+   if (type == V_008F1C_SQ_RSRC_IMG_3D)
+   state[4] |= 

[Mesa-dev] [PATCH 102/140] radeonsi/gfx9: don't check array_mode for allowing TC-compatible HTILE

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

GFX9 supports this with all modes except linear.
---
 src/gallium/drivers/radeon/r600_texture.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 10ee46a..f372341 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -222,7 +222,8 @@ static int r600_init_surface(struct r600_common_screen 
*rscreen,
flags |= RADEON_SURF_ZBUFFER;
 
if (tc_compatible_htile &&
-   array_mode == RADEON_SURF_MODE_2D) {
+   (rscreen->chip_class >= GFX9 ||
+array_mode == RADEON_SURF_MODE_2D)) {
/* TC-compatible HTILE only supports Z32_FLOAT.
 * GFX9 also supports Z16_UNORM.
 * On VI, promote Z16 to Z32. DB->CB copies will convert
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 110/140] radeonsi/gfx9: FMASK image descriptor changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 69 +++--
 1 file changed, 48 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 90f5c77..1e936b9 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3171,43 +3171,70 @@ si_make_texture_descriptor(struct si_screen *screen,
 
/* Initialize the sampler view for FMASK. */
if (tex->fmask.size) {
-   uint32_t fmask_format;
+   uint32_t data_format, num_format;
 
va = tex->resource.gpu_address + tex->fmask.offset;
 
-   switch (res->nr_samples) {
-   case 2:
-   fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
-   break;
-   case 4:
-   fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
-   break;
-   case 8:
-   fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
-   break;
-   default:
-   assert(0);
-   fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
+   if (screen->b.chip_class >= GFX9) {
+   data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
+   switch (res->nr_samples) {
+   case 2:
+   num_format = V_008F14_IMG_FMASK_8_2_2;
+   break;
+   case 4:
+   num_format = V_008F14_IMG_FMASK_8_4_4;
+   break;
+   case 8:
+   num_format = V_008F14_IMG_FMASK_32_8_8;
+   break;
+   default:
+   assert(0);
+   }
+   } else {
+   switch (res->nr_samples) {
+   case 2:
+   data_format = 
V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
+   break;
+   case 4:
+   data_format = 
V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
+   break;
+   case 8:
+   data_format = 
V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
+   break;
+   default:
+   assert(0);
+   }
+   num_format = V_008F14_IMG_NUM_FORMAT_UINT;
}
 
fmask_state[0] = va >> 8;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
-S_008F14_DATA_FORMAT_GFX6(fmask_format) |
-
S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_UINT);
+S_008F14_DATA_FORMAT_GFX6(data_format) |
+S_008F14_NUM_FORMAT_GFX6(num_format);
fmask_state[2] = S_008F18_WIDTH(width - 1) |
 S_008F18_HEIGHT(height - 1);
fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
-
S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
 S_008F1C_TYPE(si_tex_dim(res->target, target, 
0));
-   fmask_state[4] = S_008F20_DEPTH(depth - 1) |
-S_008F20_PITCH_GFX6(tex->fmask.pitch_in_pixels 
- 1);
-   fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
-S_008F24_LAST_ARRAY(last_layer);
+   fmask_state[4] = 0;
+   fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
fmask_state[6] = 0;
fmask_state[7] = 0;
+
+   if (screen->b.chip_class >= GFX9) {
+   fmask_state[3] |= 
S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode);
+   fmask_state[4] |= S_008F20_DEPTH(last_layer) |
+ 
S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch);
+   fmask_state[5] |= 
S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) |
+ 
S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned);
+   } else {
+   fmask_state[3] |= 
S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index);
+   fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
+ 

[Mesa-dev] [PATCH 115/140] radeonsi/gfx9: fix MIP0_WIDTH & MIP0_HEIGHT for compressed texture blits

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/r600/r600_blit.c  | 5 -
 src/gallium/drivers/r600/r600_pipe.h  | 4 
 src/gallium/drivers/radeon/r600_pipe_common.h | 5 +
 src/gallium/drivers/radeon/r600_texture.c | 8 +++-
 src/gallium/drivers/radeonsi/si_blit.c| 8 +++-
 src/gallium/drivers/radeonsi/si_state.c   | 6 +++---
 6 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index 650d1a4..c52492e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -726,7 +726,10 @@ void r600_resource_copy_region(struct pipe_context *ctx,
}
}
 
-   dst_view = r600_create_surface_custom(ctx, dst, _templ, dst_width, 
dst_height);
+   dst_view = r600_create_surface_custom(ctx, dst, _templ,
+ /* we don't care about these two 
for r600g */
+ dst->width0, dst->height0,
+ dst_width, dst_height);
 
if (rctx->b.chip_class >= EVERGREEN) {
src_view = evergreen_create_sampler_view_custom(ctx, src, 
_templ,
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 1d65cb3..a7b7276 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -737,10 +737,6 @@ unsigned r600_tex_wrap(unsigned wrap);
 unsigned r600_tex_mipfilter(unsigned filter);
 unsigned r600_tex_compare(unsigned compare);
 bool sampler_state_needs_border_color(const struct pipe_sampler_state *state);
-struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
-   struct pipe_resource *texture,
-   const struct pipe_surface 
*templ,
-   unsigned width, unsigned 
height);
 unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
   const unsigned char *swizzle_view,
   boolean vtx);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index a96f091..6eaa87a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -275,6 +275,10 @@ struct r600_texture {
 struct r600_surface {
struct pipe_surface base;
 
+   /* These can vary with block-compressed textures. */
+   unsigned width0;
+   unsigned height0;
+
bool color_initialized;
bool depth_initialized;
 
@@ -804,6 +808,7 @@ void vi_dcc_disable_if_incompatible_format(struct 
r600_common_context *rctx,
 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface 
*templ,
+   unsigned width0, unsigned 
height0,
unsigned width, unsigned 
height);
 unsigned r600_translate_colorswap(enum pipe_format format, bool 
do_endian_swap);
 void vi_separate_dcc_start_query(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 5b1f941..f1b2cd9 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1862,6 +1862,7 @@ void vi_dcc_disable_if_incompatible_format(struct 
r600_common_context *rctx,
 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface 
*templ,
+   unsigned width0, unsigned 
height0,
unsigned width, unsigned height)
 {
struct r600_common_context *rctx = (struct r600_common_context*)pipe;
@@ -1881,6 +1882,9 @@ struct pipe_surface *r600_create_surface_custom(struct 
pipe_context *pipe,
surface->base.height = height;
surface->base.u = templ->u;
 
+   surface->width0 = width0;
+   surface->height0 = height0;
+
if (texture->target != PIPE_BUFFER)
vi_dcc_disable_if_incompatible_format(rctx, texture,
  templ->u.tex.level,
@@ -1917,7 +1921,9 @@ static struct pipe_surface *r600_create_surface(struct 
pipe_context *pipe,
}
}
 
-   return r600_create_surface_custom(pipe, tex, templ, width, height);
+   return r600_create_surface_custom(pipe, tex, templ,
+ tex->width0, tex->height0,
+  

[Mesa-dev] [PATCH 111/140] radeonsi/gfx9: image descriptor changes in mutable fields

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_descriptors.c | 89 +--
 src/gallium/drivers/radeonsi/si_state.c   |  4 +-
 src/gallium/drivers/radeonsi/si_state.h   |  3 +-
 3 files changed, 73 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 9c7eb08..9c1603b 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -58,6 +58,7 @@
 #include "radeon/r600_cs.h"
 #include "si_pipe.h"
 #include "sid.h"
+#include "gfx9d.h"
 
 #include "util/u_format.h"
 #include "util/u_memory.h"
@@ -376,41 +377,88 @@ static void si_set_buf_desc_address(struct r600_resource 
*buf,
  * \param is_stencil   select between separate Z & Stencil
  * \param statedescriptor to update
  */
-void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
+void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
+   struct r600_texture *tex,
const struct legacy_surf_level 
*base_level_info,
unsigned base_level, unsigned first_level,
unsigned block_width, bool is_stencil,
uint32_t *state)
 {
-   uint64_t va;
-   unsigned pitch = base_level_info->nblk_x * block_width;
+   uint64_t va, meta_va = 0;
 
if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
tex = tex->flushed_depth_texture;
is_stencil = false;
}
 
-   va = tex->resource.gpu_address + base_level_info->offset;
+   va = tex->resource.gpu_address;
 
-   state[1] &= C_008F14_BASE_ADDRESS_HI;
-   state[3] &= C_008F1C_TILING_INDEX;
-   state[4] &= C_008F20_PITCH_GFX6;
-   state[6] &= C_008F28_COMPRESSION_EN;
+   if (sscreen->b.chip_class >= GFX9) {
+   /* Only stencil_offset needs to be added here. */
+   if (is_stencil)
+   va += tex->surface.u.gfx9.stencil_offset;
+   } else {
+   va += base_level_info->offset;
+   }
+
+   if (tex->dcc_offset && first_level < tex->surface.num_dcc_levels) {
+   meta_va = (!tex->dcc_separate_buffer ? 
tex->resource.gpu_address : 0) +
+ tex->dcc_offset;
+
+   if (sscreen->b.chip_class <= VI)
+   meta_va += base_level_info->dcc_offset;
+   } else if (tex->tc_compatible_htile && !is_stencil) {
+   meta_va = tex->htile_buffer->gpu_address;
+   }
 
state[0] = va >> 8;
+   state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
-   state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level,
-is_stencil));
-   state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
 
-   if (tex->dcc_offset && first_level < tex->surface.num_dcc_levels) {
-   state[6] |= S_008F28_COMPRESSION_EN(1);
-   state[7] = ((!tex->dcc_separate_buffer ? 
tex->resource.gpu_address : 0) +
-   tex->dcc_offset +
-   base_level_info->dcc_offset) >> 8;
-   } else if (tex->tc_compatible_htile) {
+   state[6] &= C_008F28_COMPRESSION_EN;
+   state[7] = 0;
+
+   if (meta_va) {
state[6] |= S_008F28_COMPRESSION_EN(1);
-   state[7] = tex->htile_buffer->gpu_address >> 8;
+   state[7] = meta_va >> 8;
+   }
+
+   if (sscreen->b.chip_class >= GFX9) {
+   state[3] &= C_008F1C_SW_MODE;
+   state[4] &= C_008F20_PITCH_GFX9;
+
+   if (is_stencil) {
+   state[3] |= 
S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
+   state[4] |= 
S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch);
+   } else {
+   state[3] |= 
S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
+   state[4] |= 
S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch);
+   }
+
+   state[5] &= C_008F24_META_DATA_ADDRESS &
+   C_008F24_META_PIPE_ALIGNED &
+   C_008F24_META_RB_ALIGNED;
+   if (meta_va) {
+   struct gfx9_surf_meta_flags meta;
+
+   if (tex->dcc_offset)
+   meta = tex->surface.u.gfx9.dcc;
+   else
+   meta = tex->surface.u.gfx9.htile;
+
+   state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
+   
S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
+   S_008F24_META_RB_ALIGNED(meta.rb_aligned);

[Mesa-dev] [PATCH 135/140] radeon/vce: adapt gfx9 surface to vce

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
Acked-by: Alex Deucher 
---
 src/gallium/drivers/radeon/radeon_vce.c| 24 +
 src/gallium/drivers/radeon/radeon_vce_52.c | 42 +++---
 2 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
b/src/gallium/drivers/radeon/radeon_vce.c
index 0ff66f4..8d25dd7 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -224,9 +224,17 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
 void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
   signed *luma_offset, signed *chroma_offset)
 {
-   unsigned pitch = align(enc->luma->u.legacy.level[0].nblk_x * 
enc->luma->bpe, 128);
-   unsigned vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
-   unsigned fsize = pitch * (vpitch + vpitch / 2);
+   struct r600_common_screen *rscreen = (struct r600_common_screen 
*)enc->screen;
+   unsigned pitch, vpitch, fsize;
+
+   if (rscreen->chip_class < GFX9) {
+   pitch = align(enc->luma->u.legacy.level[0].nblk_x * 
enc->luma->bpe, 128);
+   vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
+   } else {
+   pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 
256);
+   vpitch = align(enc->luma->u.gfx9.surf_height, 16);
+   }
+   fsize = pitch * (vpitch + vpitch / 2);
 
*luma_offset = slot->index * fsize;
*chroma_offset = *luma_offset + pitch * vpitch;
@@ -456,8 +464,14 @@ struct pipe_video_codec *rvce_create_encoder(struct 
pipe_context *context,
goto error;
 
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, 
_surf);
-   cpb_size = align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 
128);
-   cpb_size = cpb_size * align(tmp_surf->u.legacy.level[0].nblk_y, 32);
+
+   cpb_size = (rscreen->chip_class < GFX9) ?
+   align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+   align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
+
+   align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
+   align(tmp_surf->u.gfx9.surf_height, 32);
+
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
if (enc->dual_pipe)
diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c 
b/src/gallium/drivers/radeon/radeon_vce_52.c
index 6f9194c..36cf480 100644
--- a/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -167,6 +167,7 @@ void radeon_vce_52_get_param(struct rvce_encoder *enc, 
struct pipe_h264_enc_pict
 
 static void create(struct rvce_encoder *enc)
 {
+   struct r600_common_screen *rscreen = (struct r600_common_screen 
*)enc->screen;
enc->task_info(enc, 0x, 0, 0, 0);
 
RVCE_BEGIN(0x0101); // create cmd
@@ -177,9 +178,17 @@ static void create(struct rvce_encoder *enc)
RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction);
RVCE_CS(enc->base.width); // encImageWidth
RVCE_CS(enc->base.height); // encImageHeight
-   RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // 
encRefPicLumaPitch
-   RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // 
encRefPicChromaPitch
-   RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // 
encRefYHeightInQw
+
+   if (rscreen->chip_class < GFX9) {
+   RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); 
// encRefPicLumaPitch
+   RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * 
enc->chroma->bpe); // encRefPicChromaPitch
+   RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // 
encRefYHeightInQw
+   } else {
+   RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // 
encRefPicLumaPitch
+   RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // 
encRefPicChromaPitch
+   RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16) / 8); // 
encRefYHeightInQw
+   }
+
RVCE_CS(enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants);
 
RVCE_CS(enc->enc_pic.ec.enc_pre_encode_context_buffer_offset);
@@ -191,6 +200,7 @@ static void create(struct rvce_encoder *enc)
 
 static void encode(struct rvce_encoder *enc)
 {
+   struct r600_common_screen *rscreen = (struct r600_common_screen 
*)enc->screen;
signed luma_offset, chroma_offset, bs_offset;
unsigned dep, bs_idx = enc->bs_idx++;
int i;
@@ -239,13 +249,25 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(enc->enc_pic.eo.insert_aud);
RVCE_CS(enc->enc_pic.eo.end_of_sequence);
RVCE_CS(enc->enc_pic.eo.end_of_stream);
-   RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
-   

[Mesa-dev] [PATCH 113/140] radeonsi/gfx9: add a workaround for 1D depth textures

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

The same workaround is used by Vulkan.
---
 src/gallium/drivers/radeon/radeon_winsys.h |  8 
 src/gallium/drivers/radeonsi/si_shader.c   | 59 --
 src/gallium/drivers/radeonsi/si_state.c| 37 
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 12 +++---
 4 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index e25f60c..5b032bf 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -327,6 +327,13 @@ struct legacy_surf_layout {
 uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
 };
 
+/* Same as addrlib - AddrResourceType. */
+enum gfx9_resource_type {
+RADEON_RESOURCE_1D = 0,
+RADEON_RESOURCE_2D,
+RADEON_RESOURCE_3D,
+};
+
 struct gfx9_surf_flags {
 uint16_tswizzle_mode; /* tile mode */
 uint16_tepitch; /* (pitch - 1) or (height - 1) */
@@ -346,6 +353,7 @@ struct gfx9_surf_layout {
 struct gfx9_surf_meta_flags htile; /* metadata of depth and stencil */
 struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
 
+enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
 /* The size of the 2D plane containing all mipmap levels. */
 uint64_tsurf_slice_size;
 uint16_tsurf_pitch; /* in blocks */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 226924b..0d2a106 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3297,6 +3297,7 @@ static LLVMValueRef image_fetch_coords(
const struct tgsi_full_instruction *inst,
unsigned src)
 {
+   struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned target = inst->Memory.Texture;
@@ -3311,6 +3312,17 @@ static LLVMValueRef image_fetch_coords(
coords[chan] = tmp;
}
 
+   /* 1D textures are allocated and used as 2D on GFX9. */
+   if (ctx->screen->b.chip_class >= GFX9) {
+   if (target == TGSI_TEXTURE_1D) {
+   coords[1] = bld_base->uint_bld.zero;
+   num_coords++;
+   } else if (target == TGSI_TEXTURE_1D_ARRAY) {
+   coords[2] = coords[1];
+   coords[1] = bld_base->uint_bld.zero;
+   }
+   }
+
if (num_coords == 1)
return coords[0];
 
@@ -4440,11 +4452,12 @@ static void tex_fetch_args(
 
/* Pack user derivatives */
if (opcode == TGSI_OPCODE_TXD) {
-   int param, num_src_deriv_channels;
+   int param, num_src_deriv_channels, num_dst_deriv_channels;
 
switch (target) {
case TGSI_TEXTURE_3D:
num_src_deriv_channels = 3;
+   num_dst_deriv_channels = 3;
num_deriv_channels = 3;
break;
case TGSI_TEXTURE_2D:
@@ -4454,6 +4467,7 @@ static void tex_fetch_args(
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
num_src_deriv_channels = 2;
+   num_dst_deriv_channels = 2;
num_deriv_channels = 2;
break;
case TGSI_TEXTURE_CUBE:
@@ -4462,6 +4476,7 @@ static void tex_fetch_args(
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
/* Cube derivatives will be converted to 2D. */
num_src_deriv_channels = 3;
+   num_dst_deriv_channels = 3;
num_deriv_channels = 2;
break;
case TGSI_TEXTURE_1D:
@@ -4469,16 +4484,31 @@ static void tex_fetch_args(
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
num_src_deriv_channels = 1;
-   num_deriv_channels = 1;
+
+   /* 1D textures are allocated and used as 2D on GFX9. */
+   if (ctx->screen->b.chip_class >= GFX9) {
+   num_dst_deriv_channels = 2;
+   num_deriv_channels = 2;
+   } else {
+   num_dst_deriv_channels = 1;
+   num_deriv_channels = 1;
+   }
break;
default:
unreachable("invalid target");
}
 
-   for (param = 0; param < 2; param++)
+   for (param = 0; param < 2; param++) {

[Mesa-dev] [PATCH 136/140] radeon/vce: add support for firmware 53.14.4

2017-03-20 Thread Marek Olšák
From: Leo Liu 

Signed-off-by: Leo Liu 
Acked-by: Alex Deucher 
---
 src/gallium/drivers/radeon/radeon_vce.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
b/src/gallium/drivers/radeon/radeon_vce.c
index 8d25dd7..be2 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -52,6 +52,7 @@
 #define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
 #define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
+#define FW_53_14_4 ((53 << 24) | (14 << 16) | (4 << 8))
 
 /**
  * flush commands to the hardware
@@ -509,6 +510,10 @@ struct pipe_video_codec *rvce_create_encoder(struct 
pipe_context *context,
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
break;
+   case FW_53_14_4:
+   radeon_vce_52_init(enc);
+   get_pic_param = radeon_vce_52_get_param;
+   break;
 
default:
goto error;
@@ -541,6 +546,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen 
*rscreen)
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
+   case FW_53_14_4:
return true;
default:
return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 106/140] radeonsi/gfx9: do DCC clears on non-mipmapped textures only

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 16 
 src/gallium/drivers/radeonsi/si_blit.c|  5 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 1838de4..5b1f941 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2417,7 +2417,7 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
unsigned level, unsigned clear_value)
 {
struct pipe_resource *dcc_buffer;
-   uint64_t dcc_offset;
+   uint64_t dcc_offset, clear_size;
 
assert(rtex->dcc_offset && level < rtex->surface.num_dcc_levels);
 
@@ -2429,10 +2429,18 @@ void vi_dcc_clear_level(struct r600_common_context 
*rctx,
dcc_offset = rtex->dcc_offset;
}
 
-   dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
+   if (rctx->chip_class >= GFX9) {
+   /* Mipmap level clears aren't implemented. */
+   assert(rtex->resource.b.b.last_level == 0);
+   /* MSAA needs a different clear size. */
+   assert(rtex->resource.b.b.nr_samples <= 1);
+   clear_size = rtex->surface.dcc_size;
+   } else {
+   dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
+   clear_size = 
rtex->surface.u.legacy.level[level].dcc_fast_clear_size;
+   }
 
-   rctx->clear_buffer(>b, dcc_buffer, dcc_offset,
-  
rtex->surface.u.legacy.level[level].dcc_fast_clear_size,
+   rctx->clear_buffer(>b, dcc_buffer, dcc_offset, clear_size,
   clear_value, R600_COHERENCY_CB_META);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index da6c0cd..24c73d0 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -1035,6 +1035,11 @@ static bool do_hardware_msaa_resolve(struct pipe_context 
*ctx,
 */
if (dst->dcc_offset &&
info->dst.level < dst->surface.num_dcc_levels) {
+   /* TODO: Implement per-level DCC clears for GFX9. */
+   if (sctx->b.chip_class >= GFX9 &&
+   info->dst.resource->last_level != 0)
+   goto resolve_to_temp;
+
vi_dcc_clear_level(>b, dst, info->dst.level,
   0x);
dst->dirty_level_mask &= ~(1 << info->dst.level);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 114/140] radeonsi/gfx9: fix textureSize/imageSize for 1D textures

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 57 ++--
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0d2a106..74fef36 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4082,6 +4082,35 @@ static void set_tex_fetch_args(struct si_shader_context 
*ctx,
memcpy(emit_data->args, , sizeof(args));
 }
 
+static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
+   unsigned target, LLVMValueRef out)
+{
+   LLVMBuilderRef builder = ctx->gallivm.builder;
+
+   /* 1D textures are allocated and used as 2D on GFX9. */
+if (ctx->screen->b.chip_class >= GFX9 &&
+   (target == TGSI_TEXTURE_1D_ARRAY ||
+target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
+   LLVMValueRef layers =
+   LLVMBuildExtractElement(builder, out,
+   LLVMConstInt(ctx->i32, 2, 0), 
"");
+   out = LLVMBuildInsertElement(builder, out, layers,
+LLVMConstInt(ctx->i32, 1, 0), "");
+   }
+
+   /* Divide the number of layers by 6 to get the number of cubes. */
+   if (target == TGSI_TEXTURE_CUBE_ARRAY ||
+   target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
+   LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
+
+   LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, 
"");
+   z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
+
+   out = LLVMBuildInsertElement(builder, out, z, imm2, "");
+   }
+   return out;
+}
+
 static void resq_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
@@ -4139,15 +4168,7 @@ static void resq_emit(
args.opcode = ac_image_get_resinfo;
out = ac_build_image_opcode(>ac, );
 
-   /* Divide the number of layers by 6 to get the number of cubes. 
*/
-   if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
-   LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
-   LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
-
-   LLVMValueRef z = LLVMBuildExtractElement(builder, out, 
imm2, "");
-   z = LLVMBuildSDiv(builder, z, imm6, "");
-   out = LLVMBuildInsertElement(builder, out, z, imm2, "");
-   }
+   out = fix_resinfo(ctx, inst->Memory.Texture, out);
}
 
emit_data->output[emit_data->chan] = out;
@@ -4327,23 +4348,9 @@ static void txq_emit(const struct lp_build_tgsi_action 
*action,
memcpy(, emit_data->args, sizeof(args)); /* ugly */
 
args.opcode = ac_image_get_resinfo;
-   emit_data->output[emit_data->chan] =
-   ac_build_image_opcode(>ac, );
-
-   /* Divide the number of layers by 6 to get the number of cubes. */
-   if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-   target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-   LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 
2);
-   LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 
6);
+   LLVMValueRef result = ac_build_image_opcode(>ac, );
 
-   LLVMValueRef v4 = emit_data->output[emit_data->chan];
-   LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
-   z = LLVMBuildSDiv(builder, z, six, "");
-
-   emit_data->output[emit_data->chan] =
-   LLVMBuildInsertElement(builder, v4, z, two, "");
-   }
+   emit_data->output[emit_data->chan] = fix_resinfo(ctx, target, result);
 }
 
 static void tex_fetch_args(
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 123/140] radeonsi/gfx9: set/validate GFX9 BO metadata

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c  | 20 +++-
 src/gallium/drivers/radeon/radeon_winsys.h |  5 +
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2e66dd0..df260b6 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -291,6 +291,7 @@ static void r600_texture_init_metadata(struct 
r600_common_screen *rscreen,
memset(metadata, 0, sizeof(*metadata));
 
if (rscreen->chip_class >= GFX9) {
+   metadata->u.gfx9.swizzle_mode = 
surface->u.gfx9.surf.swizzle_mode;
} else {
metadata->u.legacy.microtile = surface->u.legacy.level[0].mode 
>= RADEON_SURF_MODE_1D ?
   RADEON_LAYOUT_TILED : 
RADEON_LAYOUT_LINEAR;
@@ -1345,6 +1346,7 @@ static struct pipe_resource 
*r600_texture_from_handle(struct pipe_screen *screen
int r;
struct radeon_bo_metadata metadata = {};
struct r600_texture *rtex;
+   bool is_scanout;
 
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != 
PIPE_TEXTURE_RECT) ||
@@ -1358,6 +1360,13 @@ static struct pipe_resource 
*r600_texture_from_handle(struct pipe_screen *screen
rscreen->ws->buffer_get_metadata(buf, );
 
if (rscreen->chip_class >= GFX9) {
+   if (metadata.u.gfx9.swizzle_mode > 0)
+   array_mode = RADEON_SURF_MODE_2D;
+   else
+   array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+   is_scanout = metadata.u.gfx9.swizzle_mode == 0 ||
+metadata.u.gfx9.swizzle_mode % 4 == 2;
} else {
surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config;
surface.u.legacy.bankw = metadata.u.legacy.bankw;
@@ -1372,10 +1381,12 @@ static struct pipe_resource 
*r600_texture_from_handle(struct pipe_screen *screen
array_mode = RADEON_SURF_MODE_1D;
else
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+   is_scanout = metadata.u.legacy.scanout;
}
 
r = r600_init_surface(rscreen, , templ, array_mode, stride,
- offset, true, metadata.u.legacy.scanout, false, 
false);
+ offset, true, is_scanout, false, false);
if (r) {
return NULL;
}
@@ -1390,6 +1401,13 @@ static struct pipe_resource 
*r600_texture_from_handle(struct pipe_screen *screen
if (rscreen->apply_opaque_metadata)
rscreen->apply_opaque_metadata(rscreen, rtex, );
 
+   /* Validate that addrlib arrived at the same surface parameters. */
+   if (rscreen->chip_class >= GFX9) {
+   struct gfx9_surf_layout *gfx9 = 
+
+   assert(metadata.u.gfx9.swizzle_mode == gfx9->surf.swizzle_mode);
+   }
+
return >resource.b.b;
 }
 
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index b3c7608..7839170 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -250,6 +250,11 @@ struct radeon_bo_metadata {
 unsignedstride;
 boolscanout;
 } legacy;
+
+struct {
+/* surface flags */
+unsigned swizzle_mode:5;
+} gfx9;
 } u;
 
 /* Additional metadata associated with the buffer, in bytes.
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 124/140] radeonsi/gfx9: handle pitch and offset overrides for texture_from_handle

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index df260b6..0231fe2 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -266,18 +266,25 @@ static int r600_init_surface(struct r600_common_screen 
*rscreen,
return r;
}
 
-   if (pitch_in_bytes_override &&
-   pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) 
{
-   /* old ddx on evergreen over estimate alignment for 1d, only 1 
level
-* for those
-*/
-   surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / 
bpe;
-   surface->u.legacy.level[0].slice_size = pitch_in_bytes_override 
* surface->u.legacy.level[0].nblk_y;
-   }
+   if (rscreen->chip_class >= GFX9) {
+   assert(!pitch_in_bytes_override ||
+  pitch_in_bytes_override == surface->u.gfx9.surf_pitch * 
bpe);
+   surface->u.gfx9.surf_offset = offset;
+   } else {
+   if (pitch_in_bytes_override &&
+   pitch_in_bytes_override != 
surface->u.legacy.level[0].nblk_x * bpe) {
+   /* old ddx on evergreen over estimate alignment for 1d, 
only 1 level
+* for those
+*/
+   surface->u.legacy.level[0].nblk_x = 
pitch_in_bytes_override / bpe;
+   surface->u.legacy.level[0].slice_size = 
pitch_in_bytes_override *
+   
surface->u.legacy.level[0].nblk_y;
+   }
 
-   if (offset) {
-   for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
-   surface->u.legacy.level[i].offset += offset;
+   if (offset) {
+   for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); 
++i)
+   surface->u.legacy.level[i].offset += offset;
+   }
}
return 0;
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 122/140] radeonsi/gfx9: add radeon_surf.gfx9.surf_offset

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c  | 2 +-
 src/gallium/drivers/radeon/radeon_winsys.h | 1 +
 src/gallium/drivers/radeonsi/si_descriptors.c  | 2 ++
 src/gallium/drivers/radeonsi/si_state.c| 2 ++
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 1 +
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index f1b2cd9..2e66dd0 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -555,7 +555,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* 
screen,
}
 
if (rscreen->chip_class >= GFX9) {
-   offset = 0;
+   offset = rtex->surface.u.gfx9.surf_offset;
stride = rtex->surface.u.gfx9.surf_pitch *
 rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 5b032bf..b3c7608 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -354,6 +354,7 @@ struct gfx9_surf_layout {
 struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
 
 enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
+uint64_tsurf_offset; /* 0 unless imported with an 
offset */
 /* The size of the 2D plane containing all mipmap levels. */
 uint64_tsurf_slice_size;
 uint16_tsurf_pitch; /* in blocks */
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 58d35da..fb82f8f 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -398,6 +398,8 @@ void si_set_mutable_tex_desc_fields(struct si_screen 
*sscreen,
/* Only stencil_offset needs to be added here. */
if (is_stencil)
va += tex->surface.u.gfx9.stencil_offset;
+   else
+   va += tex->surface.u.gfx9.surf_offset;
} else {
va += base_level_info->offset;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 82f2793..a46bc4b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2271,6 +2271,7 @@ static void si_init_depth_surface(struct si_context *sctx,
surf->db_htile_surface = 0;
 
if (sctx->b.chip_class >= GFX9) {
+   assert(rtex->surface.u.gfx9.surf_offset == 0);
surf->db_depth_base = rtex->resource.gpu_address >> 8;
surf->db_stencil_base = (rtex->resource.gpu_address +
 rtex->surface.u.gfx9.stencil_offset) 
>> 8;
@@ -2658,6 +2659,7 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
meta = tex->surface.u.gfx9.cmask;
 
/* Set mutable surface parameters. */
+   cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
cb_color_attrib |= 
S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
   
S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
   S_028C74_RB_ALIGNED(meta.rb_aligned) 
|
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 7f930ed..48df27b 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -903,6 +903,7 @@ static int gfx9_surface_init(struct radeon_winsys *rws,
surf->surf_size = 0;
surf->dcc_size = 0;
surf->htile_size = 0;
+   surf->u.gfx9.surf_offset = 0;
surf->u.gfx9.stencil_offset = 0;
surf->u.gfx9.fmask_size = 0;
surf->u.gfx9.cmask_size = 0;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 101/140] radeonsi/gfx9: update HTILE/CMASK/FMASK allocators

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 8cc9f2a..10ee46a 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -593,6 +593,12 @@ void r600_texture_get_fmask_info(struct r600_common_screen 
*rscreen,
 
memset(out, 0, sizeof(*out));
 
+   if (rscreen->chip_class >= GFX9) {
+   out->alignment = rtex->surface.u.gfx9.fmask_alignment;
+   out->size = rtex->surface.u.gfx9.fmask_size;
+   return;
+   }
+
templ.nr_samples = 1;
flags = rtex->surface.flags | RADEON_SURF_FMASK;
 
@@ -698,6 +704,12 @@ static void si_texture_get_cmask_info(struct 
r600_common_screen *rscreen,
unsigned num_pipes = rscreen->info.num_tile_pipes;
unsigned cl_width, cl_height;
 
+   if (rscreen->chip_class >= GFX9) {
+   out->alignment = rtex->surface.u.gfx9.cmask_alignment;
+   out->size = rtex->surface.u.gfx9.cmask_size;
+   return;
+   }
+
switch (num_pipes) {
case 2:
cl_width = 32;
@@ -799,6 +811,8 @@ static void r600_texture_get_htile_size(struct 
r600_common_screen *rscreen,
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = rscreen->info.num_tile_pipes;
 
+   assert(rscreen->chip_class <= VI);
+
rtex->surface.htile_size = 0;
 
if (rscreen->chip_class <= EVERGREEN &&
@@ -873,7 +887,7 @@ static void r600_texture_allocate_htile(struct 
r600_common_screen *rscreen,
 {
uint32_t clear_value;
 
-   if (rtex->tc_compatible_htile) {
+   if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) {
clear_value = 0x030F;
} else {
r600_texture_get_htile_size(rscreen, rtex);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 108/140] radeonsi/gfx9: DB changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h |   8 +-
 src/gallium/drivers/radeonsi/si_state.c   | 262 +-
 2 files changed, 176 insertions(+), 94 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index e00f74b..a96f091 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -307,17 +307,19 @@ struct r600_surface {
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. 
R600 only */
 
/* DB registers. */
+   uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) 
or DB_DEPTH_BASE (r600) */
+   uint64_t db_stencil_base;   /* EG and later */
+   uint64_t db_htile_data_base;
unsigned db_depth_info; /* R600 only, then SI and later */
unsigned db_z_info; /* EG and later */
-   unsigned db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) 
or DB_DEPTH_BASE (r600) */
+   unsigned db_z_info2;/* GFX9+ */
unsigned db_depth_view;
unsigned db_depth_size;
unsigned db_depth_slice;/* EG and later */
-   unsigned db_stencil_base;   /* EG and later */
unsigned db_stencil_info;   /* EG and later */
+   unsigned db_stencil_info2;  /* GFX9+ */
unsigned db_prefetch_limit; /* R600 only */
unsigned db_htile_surface;
-   unsigned db_htile_data_base;
unsigned db_preload_control;/* EG and later */
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index a7fd2d7..f880c31 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2244,7 +2244,6 @@ static void si_init_depth_surface(struct si_context *sctx,
 {
struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
unsigned level = surf->base.u.tex.level;
-   struct legacy_surf_level *levelinfo = 
>surface.u.legacy.level[level];
unsigned format, stencil_format;
uint32_t z_info, s_info;
 
@@ -2261,87 +2260,140 @@ static void si_init_depth_surface(struct si_context 
*sctx,
surf->db_htile_data_base = 0;
surf->db_htile_surface = 0;
 
-   assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
-
-   surf->db_depth_base = (rtex->resource.gpu_address +
-  rtex->surface.u.legacy.level[level].offset) >> 8;
-   surf->db_stencil_base = (rtex->resource.gpu_address +
-
rtex->surface.u.legacy.stencil_level[level].offset) >> 8;
+   if (sctx->b.chip_class >= GFX9) {
+   surf->db_depth_base = rtex->resource.gpu_address >> 8;
+   surf->db_stencil_base = (rtex->resource.gpu_address +
+rtex->surface.u.gfx9.stencil_offset) 
>> 8;
+   z_info = S_028038_FORMAT(format) |
+
S_028038_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)) |
+
S_028038_SW_MODE(rtex->surface.u.gfx9.surf.swizzle_mode) |
+S_028038_MAXMIP(rtex->resource.b.b.last_level);
+   s_info = S_02803C_FORMAT(stencil_format) |
+
S_02803C_SW_MODE(rtex->surface.u.gfx9.stencil.swizzle_mode);
+   surf->db_z_info2 = 
S_028068_EPITCH(rtex->surface.u.gfx9.surf.epitch);
+   surf->db_stencil_info2 = 
S_02806C_EPITCH(rtex->surface.u.gfx9.stencil.epitch);
+   surf->db_depth_view |= S_028008_MIPID(level);
+   surf->db_depth_size = S_02801C_X_MAX(rtex->resource.b.b.width0 
- 1) |
+ S_02801C_Y_MAX(rtex->resource.b.b.height0 
- 1);
+
+   /* Only use HTILE for the first level. */
+   if (rtex->htile_buffer && !level) {
+   z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
+ S_028038_ALLOW_EXPCLEAR(1);
+
+   if (rtex->tc_compatible_htile) {
+   unsigned max_zplanes = 4;
+
+   if (rtex->db_render_format == 
PIPE_FORMAT_Z16_UNORM &&
+   rtex->resource.b.b.nr_samples > 1)
+   max_zplanes = 2;
+
+   z_info |= 
S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
+ S_028038_ITERATE_FLUSH(1);
+   s_info |= S_02803C_ITERATE_FLUSH(1);
+   }
 
-   z_info = S_028040_FORMAT(format) |
-
S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
-   s_info = S_028044_FORMAT(stencil_format);
-   surf->db_depth_info = 
S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
+ 

[Mesa-dev] [PATCH 120/140] radeonsi/gfx9: flush CB & DB caches with an EOP TS event

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 107 +--
 1 file changed, 84 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index a80f021..2e1656e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -778,6 +778,8 @@ void si_emit_cache_flush(struct si_context *sctx)
struct r600_common_context *rctx = >b;
struct radeon_winsys_cs *cs = rctx->gfx.cs;
uint32_t cp_coher_cntl = 0;
+   uint32_t flush_cb_db = rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB);
 
if (rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
   SI_CONTEXT_FLUSH_AND_INV_DB))
@@ -796,30 +798,34 @@ void si_emit_cache_flush(struct si_context *sctx)
if (rctx->flags & SI_CONTEXT_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 
-   if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
-   cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
-S_0085F0_CB0_DEST_BASE_ENA(1) |
-S_0085F0_CB1_DEST_BASE_ENA(1) |
-S_0085F0_CB2_DEST_BASE_ENA(1) |
-S_0085F0_CB3_DEST_BASE_ENA(1) |
-S_0085F0_CB4_DEST_BASE_ENA(1) |
-S_0085F0_CB5_DEST_BASE_ENA(1) |
-S_0085F0_CB6_DEST_BASE_ENA(1) |
-S_0085F0_CB7_DEST_BASE_ENA(1);
-
-   /* Necessary for DCC */
-   if (rctx->chip_class == VI)
-   r600_gfx_write_event_eop(rctx, 
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
-0, 0, NULL, 0, 0, 0);
+   if (rctx->chip_class <= VI) {
+   if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
+   cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
+S_0085F0_CB0_DEST_BASE_ENA(1) |
+S_0085F0_CB1_DEST_BASE_ENA(1) |
+S_0085F0_CB2_DEST_BASE_ENA(1) |
+S_0085F0_CB3_DEST_BASE_ENA(1) |
+S_0085F0_CB4_DEST_BASE_ENA(1) |
+S_0085F0_CB5_DEST_BASE_ENA(1) |
+S_0085F0_CB6_DEST_BASE_ENA(1) |
+S_0085F0_CB7_DEST_BASE_ENA(1);
+
+   /* Necessary for DCC */
+   if (rctx->chip_class == VI)
+   r600_gfx_write_event_eop(rctx, 
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
+0, 0, NULL, 0, 0, 0);
+   }
+   if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB)
+   cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
+S_0085F0_DB_DEST_BASE_ENA(1);
+   }
 
+   if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | 
EVENT_INDEX(0));
}
if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
-   cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
-S_0085F0_DB_DEST_BASE_ENA(1);
-
/* Flush HTILE. SURFACE_SYNC will wait for idle. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | 
EVENT_INDEX(0));
@@ -829,8 +835,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 * VS and PS waits are unnecessary if SURFACE_SYNC is going to wait
 * for everything including CB/DB cache flushes.
 */
-   if (!(rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
-SI_CONTEXT_FLUSH_AND_INV_DB))) {
+   if (!flush_cb_db) {
if (rctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | 
EVENT_INDEX(4));
@@ -864,6 +869,62 @@ void si_emit_cache_flush(struct si_context *sctx)
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | 
EVENT_INDEX(0));
}
 
+   /* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
+* wait for idle on GFX9. We have to use a TS event.
+*/
+   if (sctx->b.chip_class >= GFX9 && flush_cb_db) {
+   struct r600_resource *rbuf = NULL;
+   

[Mesa-dev] [PATCH 118/140] radeonsi/gfx9: only use CE RAM for most-used descriptors

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

because the CE RAM size decreased to 4 KB.
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 26 +-
 src/gallium/drivers/radeonsi/si_state.h   |  2 ++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 9c1603b..58d35da 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -113,6 +113,7 @@ static void si_init_descriptors(struct si_descriptors *desc,
desc->shader_userdata_offset = shader_userdata_index * 4;
 
if (ce_offset) {
+   desc->uses_ce = true;
desc->ce_offset = *ce_offset;
 
/* make sure that ce_offset stays 32 byte aligned */
@@ -210,7 +211,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
if (!desc->dirty_mask)
return true;
 
-   if (sctx->ce_ib) {
+   if (sctx->ce_ib && desc->uses_ce) {
uint32_t const* list = (uint32_t const*)desc->list;
 
if (desc->ce_ram_dirty)
@@ -1941,6 +1942,16 @@ void si_init_all_descriptors(struct si_context *sctx)
unsigned ce_offset = 0;
 
for (i = 0; i < SI_NUM_SHADERS; i++) {
+   /* GFX9 has only 4KB of CE, while previous chips had 32KB.
+* Rarely used descriptors don't use CE RAM.
+*/
+   bool big_ce = sctx->b.chip_class <= VI;
+   bool images_use_ce = big_ce;
+   bool shaderbufs_use_ce = big_ce ||
+i == PIPE_SHADER_COMPUTE;
+   bool samplers_use_ce = big_ce ||
+  i == PIPE_SHADER_FRAGMENT;
+
si_init_buffer_resources(>const_buffers[i],
 si_const_buffer_descriptors(sctx, i),
 SI_NUM_CONST_BUFFERS, 
SI_SGPR_CONST_BUFFERS,
@@ -1950,15 +1961,17 @@ void si_init_all_descriptors(struct si_context *sctx)
 si_shader_buffer_descriptors(sctx, i),
 SI_NUM_SHADER_BUFFERS, 
SI_SGPR_SHADER_BUFFERS,
 RADEON_USAGE_READWRITE, 
RADEON_PRIO_SHADER_RW_BUFFER,
-_offset);
+shaderbufs_use_ce ? _offset : NULL);
 
si_init_descriptors(si_sampler_descriptors(sctx, i),
SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
-   null_texture_descriptor, _offset);
+   null_texture_descriptor,
+   samplers_use_ce ? _offset : NULL);
 
si_init_descriptors(si_image_descriptors(sctx, i),
SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
-   null_image_descriptor, _offset);
+   null_image_descriptor,
+   images_use_ce ? _offset : NULL);
}
 
si_init_buffer_resources(>rw_buffers,
@@ -1971,7 +1984,10 @@ void si_init_all_descriptors(struct si_context *sctx)
 
sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
-   assert(ce_offset <= 32768);
+   if (sctx->b.chip_class >= GFX9)
+   assert(ce_offset <= 4096);
+   else
+   assert(ce_offset <= 32768);
 
/* Set pipe_context functions. */
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index e06b4d1..aad1c83 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -232,6 +232,8 @@ struct si_descriptors {
/* elements of the list that are changed and need to be uploaded */
unsigned dirty_mask;
 
+   /* Whether CE is used to upload this descriptor array. */
+   bool uses_ce;
/* Whether the CE ram is dirty and needs to be reinitialized entirely
 * before we can do partial updates. */
bool ce_ram_dirty;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 105/140] radeonsi/gfx9: update can_sample_z/s flags

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 129b3f4..1838de4 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1131,8 +1131,13 @@ r600_texture_create_object(struct pipe_screen *screen,
if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
   R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
rscreen->chip_class >= EVERGREEN) {
-   rtex->can_sample_z = 
!rtex->surface.u.legacy.depth_adjusted;
-   rtex->can_sample_s = 
!rtex->surface.u.legacy.stencil_adjusted;
+   if (rscreen->chip_class >= GFX9) {
+   rtex->can_sample_z = true;
+   rtex->can_sample_s = true;
+   } else {
+   rtex->can_sample_z = 
!rtex->surface.u.legacy.depth_adjusted;
+   rtex->can_sample_s = 
!rtex->surface.u.legacy.stencil_adjusted;
+   }
} else {
if (rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM 
||
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 103/140] radeonsi/gfx9: update si_set_optimal_micro_tile_mode

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 44 ++-
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index f372341..ebd2c5c 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2435,14 +2435,46 @@ static void si_set_optimal_micro_tile_mode(struct 
r600_common_screen *rscreen,
rtex->surface.micro_tile_mode == 
rtex->last_msaa_resolve_target_micro_mode)
return;
 
-   assert(rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
+   assert(rscreen->chip_class >= GFX9 ||
+  rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
assert(rtex->resource.b.b.last_level == 0);
 
-   /* These magic numbers were copied from addrlib. It doesn't use any
-* definitions for them either. They are all 2D_TILED_THIN1 modes with
-* different bpp and micro tile mode.
-*/
-   if (rscreen->chip_class >= CIK) {
+   if (rscreen->chip_class >= GFX9) {
+   /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
+   assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
+
+   /* If you do swizzle_mode % 4, you'll get:
+*   0 = Depth
+*   1 = Standard,
+*   2 = Displayable
+*   3 = Rotated
+*
+* Depth-sample order isn't allowed:
+*/
+   assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
+
+   switch (rtex->last_msaa_resolve_target_micro_mode) {
+   case RADEON_MICRO_MODE_DISPLAY:
+   rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
+   rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
+   break;
+   case RADEON_MICRO_MODE_THIN:
+   rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
+   rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
+   break;
+   case RADEON_MICRO_MODE_ROTATED:
+   rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
+   rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
+   break;
+   default: /* depth */
+   assert(!"unexpected micro mode");
+   return;
+   }
+   } else if (rscreen->chip_class >= CIK) {
+   /* These magic numbers were copied from addrlib. It doesn't use
+* any definitions for them either. They are all 2D_TILED_THIN1
+* modes with different bpp and micro tile mode.
+*/
switch (rtex->last_msaa_resolve_target_micro_mode) {
case RADEON_MICRO_MODE_DISPLAY:
rtex->surface.u.legacy.tiling_index[0] = 10;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 107/140] radeonsi/gfx9: CB changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h |   3 +-
 src/gallium/drivers/radeonsi/si_state.c   | 174 ++
 2 files changed, 125 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index e5748db..e00f74b 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -201,7 +201,7 @@ struct r600_cmask_info {
uint64_t size;
unsigned alignment;
unsigned slice_tile_max;
-   unsigned base_address_reg;
+   uint64_t base_address_reg;
 };
 
 struct r600_texture {
@@ -293,6 +293,7 @@ struct r600_surface {
unsigned cb_color_pitch;/* EG and later */
unsigned cb_color_slice;/* EG and later */
unsigned cb_color_attrib;   /* EG and later */
+   unsigned cb_color_attrib2;  /* GFX9 and later */
unsigned cb_dcc_control;/* VI and later */
unsigned cb_color_fmask;/* CB_COLORn_FMASK (EG and later) or 
CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice;  /* EG and later */
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 25dae01..a7fd2d7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2208,6 +2208,31 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
}
 
+   if (sctx->b.chip_class >= GFX9) {
+   unsigned mip0_depth = util_max_layer(>resource.b.b, 0);
+   unsigned type;
+
+   switch (rtex->resource.b.b.target) {
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+   type = V_028C74_1D;
+   break;
+   default:
+   type = V_028C74_2D;
+   break;
+   case PIPE_TEXTURE_3D:
+   type = V_028C74_3D;
+   break;
+   }
+
+   surf->cb_color_view |= 
S_028C6C_MIP_LEVEL(surf->base.u.tex.level);
+   surf->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
+S_028C74_RESOURCE_TYPE(type);
+   surf->cb_color_attrib2 = 
S_028C68_MIP0_WIDTH(rtex->resource.b.b.width0 - 1) |
+
S_028C68_MIP0_HEIGHT(rtex->resource.b.b.height0 - 1) |
+
S_028C68_MAX_MIP(rtex->resource.b.b.last_level);
+   }
+
/* Determine pixel shader export format */
si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
 
@@ -2506,10 +2531,8 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
 
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
-   const struct legacy_surf_level *level_info;
-   unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
-   unsigned cb_color_base, cb_color_fmask, cb_color_attrib;
-   unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
+   uint64_t cb_color_base, cb_color_fmask, cb_dcc_base;
+   unsigned cb_color_attrib;
 
if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
continue;
@@ -2522,7 +2545,6 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
}
 
tex = (struct r600_texture *)cb->base.texture;
-   level_info =  
>surface.u.legacy.level[cb->base.u.tex.level];
radeon_add_to_buffer_list(>b, >b.gfx,
  >resource, RADEON_USAGE_READWRITE,
  tex->resource.b.b.nr_samples > 1 ?
@@ -2542,34 +2564,16 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
  RADEON_PRIO_DCC);
 
/* Compute mutable surface parameters. */
-   pitch_tile_max = level_info->nblk_x / 8 - 1;
-   slice_tile_max = level_info->nblk_x *
-level_info->nblk_y / 64 - 1;
-   tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, 
false);
-
-   cb_color_base = (tex->resource.gpu_address + 
level_info->offset) >> 8;
-   cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
-   cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
-   cb_color_attrib = cb->cb_color_attrib |
- S_028C74_TILE_MODE_INDEX(tile_mode_index);
-
-   if (tex->fmask.size) {
-   if (sctx->b.chip_class >= CIK)
-   cb_color_pitch |= 

[Mesa-dev] [PATCH 095/140] winsys/amdgpu: update amdgpu_addr_create for GFX9

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 47 --
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index ede8ba6..c64865f 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -36,6 +36,9 @@
 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x000A
 #endif
 
+#ifndef CIASICIDGFXENGINE_ARCTICISLAND
+#define CIASICIDGFXENGINE_ARCTICISLAND 0x000D
+#endif
 
 static int amdgpu_surface_sanity(const struct pipe_resource *tex)
 {
@@ -107,32 +110,42 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
 
-   regValue.noOfBanks = ws->amdinfo.mc_arb_ramcfg & 0x3;
regValue.gbAddrConfig = ws->amdinfo.gb_addr_cfg;
-   regValue.noOfRanks = (ws->amdinfo.mc_arb_ramcfg & 0x4) >> 2;
-
-   regValue.backendDisables = ws->amdinfo.enabled_rb_pipes_mask;
-   regValue.pTileConfig = ws->amdinfo.gb_tile_mode;
-   regValue.noOfEntries = ARRAY_SIZE(ws->amdinfo.gb_tile_mode);
-   if (ws->info.chip_class == SI) {
-  regValue.pMacroTileConfig = NULL;
-  regValue.noOfMacroEntries = 0;
+   createFlags.value = 0;
+
+   if (ws->info.chip_class >= GFX9) {
+  addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
+  regValue.blockVarSizeLog2 = 0;
} else {
-  regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
-  regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
-   }
+  regValue.backendDisables = ws->amdinfo.backend_disable[0];
+  regValue.noOfBanks = ws->amdinfo.mc_arb_ramcfg & 0x3;
+  regValue.noOfRanks = (ws->amdinfo.mc_arb_ramcfg & 0x4) >> 2;
 
-   createFlags.value = 0;
-   createFlags.useTileIndex = 1;
-   createFlags.useHtileSliceAlign = 1;
+  regValue.backendDisables = ws->amdinfo.enabled_rb_pipes_mask;
+  regValue.pTileConfig = ws->amdinfo.gb_tile_mode;
+  regValue.noOfEntries = ARRAY_SIZE(ws->amdinfo.gb_tile_mode);
+  if (ws->info.chip_class == SI) {
+ regValue.pMacroTileConfig = NULL;
+ regValue.noOfMacroEntries = 0;
+  } else {
+ regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
+ regValue.noOfMacroEntries = 
ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
+  }
+
+  createFlags.useTileIndex = 1;
+  createFlags.useHtileSliceAlign = 1;
+
+  addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
+  addrCreateInput.chipFamily = ws->family;
+  addrCreateInput.chipRevision = ws->rev_id;
+   }
 
-   addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = ws->family;
addrCreateInput.chipRevision = ws->rev_id;
-   addrCreateInput.createFlags = createFlags;
addrCreateInput.callbacks.allocSysMem = allocSysMem;
addrCreateInput.callbacks.freeSysMem = freeSysMem;
addrCreateInput.callbacks.debugPrint = 0;
+   addrCreateInput.createFlags = createFlags;
addrCreateInput.regValue = regValue;
 
addrRet = AddrCreate(, );
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 097/140] winsys/amdgpu: set num_tile_pipes, pipe_interleave_bytes for GFX9

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 4becd2a..d1d6327 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -40,6 +40,8 @@
 #include 
 #include 
 #include "amd/common/amdgpu_id.h"
+#include "amd/common/sid.h"
+#include "amd/common/gfx9d.h"
 
 #define CIK_TILE_MODE_COLOR_2D 14
 
@@ -352,8 +354,15 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int 
fd)
ws->info.num_render_backends = ws->amdinfo.rb_pipes;
ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
-   ws->info.num_tile_pipes = cik_get_num_tile_pipes(>amdinfo);
-   ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 
0x7);
+   if (ws->info.chip_class == GFX9) {
+  ws->info.num_tile_pipes = 1 << 
G_0098F8_NUM_PIPES(ws->amdinfo.gb_addr_cfg);
+  ws->info.pipe_interleave_bytes =
+ 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(ws->amdinfo.gb_addr_cfg);
+   } else {
+  ws->info.num_tile_pipes = cik_get_num_tile_pipes(>amdinfo);
+  ws->info.pipe_interleave_bytes =
+ 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(ws->amdinfo.gb_addr_cfg);
+   }
ws->info.has_virtual_memory = true;
ws->info.has_sdma = dma.available_rings != 0;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 100/140] radeonsi/gfx9: stub testdma - array_mode_to_string

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_test_dma.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_test_dma.c 
b/src/gallium/drivers/radeon/r600_test_dma.c
index 1e870a7..264745b 100644
--- a/src/gallium/drivers/radeon/r600_test_dma.c
+++ b/src/gallium/drivers/radeon/r600_test_dma.c
@@ -149,18 +149,24 @@ static enum pipe_format get_format_from_bpp(int bpp)
}
 }
 
-static const char *array_mode_to_string(unsigned mode)
+static const char *array_mode_to_string(struct r600_common_screen *rscreen,
+   struct radeon_surf *surf)
 {
-   switch (mode) {
-   case RADEON_SURF_MODE_LINEAR_ALIGNED:
-   return "LINEAR_ALIGNED";
-   case RADEON_SURF_MODE_1D:
-   return "1D_TILED_THIN1";
-   case RADEON_SURF_MODE_2D:
-   return "2D_TILED_THIN1";
-   default:
-   assert(0);
+   if (rscreen->chip_class >= GFX9) {
+   /* TODO */
return "   UNKNOWN";
+   } else {
+   switch (surf->u.legacy.level[0].mode) {
+   case RADEON_SURF_MODE_LINEAR_ALIGNED:
+   return "LINEAR_ALIGNED";
+   case RADEON_SURF_MODE_1D:
+   return "1D_TILED_THIN1";
+   case RADEON_SURF_MODE_2D:
+   return "2D_TILED_THIN1";
+   default:
+   assert(0);
+   return "   UNKNOWN";
+   }
}
 }
 
@@ -292,9 +298,9 @@ void r600_test_dma(struct r600_common_screen *rscreen)
printf("%4u: dst = (%5u x %5u x %u, %s), "
   " src = (%5u x %5u x %u, %s), bpp = %2u, ",
   i, tdst.width0, tdst.height0, tdst.array_size,
-  
array_mode_to_string(rdst->surface.u.legacy.level[0].mode),
+  array_mode_to_string(rscreen, >surface),
   tsrc.width0, tsrc.height0, tsrc.array_size,
-  
array_mode_to_string(rsrc->surface.u.legacy.level[0].mode), bpp);
+  array_mode_to_string(rscreen, >surface), bpp);
fflush(stdout);
 
/* set src pixels */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 099/140] radeonsi/gfx9: update r600_print_texture_info

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 +-
 src/gallium/drivers/radeon/r600_texture.c | 63 ++-
 src/gallium/drivers/radeonsi/si_debug.c   |  4 +-
 3 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 883d5ed..e5748db 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -788,7 +788,8 @@ void r600_texture_get_cmask_info(struct r600_common_screen 
*rscreen,
 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
 struct pipe_resource *texture,
 struct r600_texture **staging);
-void r600_print_texture_info(struct r600_texture *rtex, FILE *f);
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+struct r600_texture *rtex, FILE *f);
 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
 bool vi_dcc_formats_compatible(enum pipe_format format1,
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 353e942..8cc9f2a 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -900,10 +900,12 @@ static void r600_texture_allocate_htile(struct 
r600_common_screen *rscreen,
}
 }
 
-void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+struct r600_texture *rtex, FILE *f)
 {
int i;
 
+   /* Common parameters. */
fprintf(f, "  Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
"blk_h=%u, array_size=%u, last_level=%u, "
"bpe=%u, nsamples=%u, flags=0x%x, %s\n",
@@ -914,6 +916,63 @@ void r600_print_texture_info(struct r600_texture *rtex, 
FILE *f)
rtex->surface.bpe, rtex->resource.b.b.nr_samples,
rtex->surface.flags, 
util_format_short_name(rtex->resource.b.b.format));
 
+   if (rscreen->chip_class >= GFX9) {
+   fprintf(f, "  Surf: size=%"PRIu64", slice_size=%"PRIu64", "
+   "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
+   rtex->surface.surf_size,
+   rtex->surface.u.gfx9.surf_slice_size,
+   rtex->surface.surf_alignment,
+   rtex->surface.u.gfx9.surf.swizzle_mode,
+   rtex->surface.u.gfx9.surf.epitch,
+   rtex->surface.u.gfx9.surf_pitch);
+
+   if (rtex->fmask.size) {
+   fprintf(f, "  FMASK: offset=%"PRIu64", size=%"PRIu64", "
+   "alignment=%u, swmode=%u, epitch=%u\n",
+   rtex->fmask.offset,
+   rtex->surface.u.gfx9.fmask_size,
+   rtex->surface.u.gfx9.fmask_alignment,
+   rtex->surface.u.gfx9.fmask.swizzle_mode,
+   rtex->surface.u.gfx9.fmask.epitch);
+   }
+
+   if (rtex->cmask.size) {
+   fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", "
+   "alignment=%u, rb_aligned=%u, 
pipe_aligned=%u\n",
+   rtex->cmask.offset,
+   rtex->surface.u.gfx9.cmask_size,
+   rtex->surface.u.gfx9.cmask_alignment,
+   rtex->surface.u.gfx9.cmask.rb_aligned,
+   rtex->surface.u.gfx9.cmask.pipe_aligned);
+   }
+
+   if (rtex->htile_buffer) {
+   fprintf(f, "  HTile: size=%u, alignment=%u, "
+   "rb_aligned=%u, pipe_aligned=%u\n",
+   rtex->htile_buffer->b.b.width0,
+   rtex->htile_buffer->buf->alignment,
+   rtex->surface.u.gfx9.htile.rb_aligned,
+   rtex->surface.u.gfx9.htile.pipe_aligned);
+   }
+
+   if (rtex->dcc_offset) {
+   fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", "
+   "alignment=%u, pitch_max=%u, 
num_dcc_levels=%u\n",
+   rtex->dcc_offset, rtex->surface.dcc_size,
+   rtex->surface.dcc_alignment,
+   rtex->surface.u.gfx9.dcc_pitch_max,
+   rtex->surface.num_dcc_levels);
+   }
+
+   if (rtex->surface.u.gfx9.stencil_offset) {
+   fprintf(f, "  Stencil: offset=%"PRIu64", swmode=%u, 

[Mesa-dev] [PATCH 092/140] gallium/radeon: move pre-GFX9 radeon_surf.* members to radeon_surf.u.legacy.*

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/r600/evergreen_state.c | 104 +++---
 src/gallium/drivers/r600/r600_blit.c   |   4 +-
 src/gallium/drivers/r600/r600_state.c  |  64 -
 src/gallium/drivers/radeon/r600_test_dma.c |   4 +-
 src/gallium/drivers/radeon/r600_texture.c  | 156 ++---
 src/gallium/drivers/radeon/radeon_uvd.c|  20 +--
 src/gallium/drivers/radeon/radeon_vce.c|   8 +-
 src/gallium/drivers/radeon/radeon_vce_40_2_2.c |  16 +--
 src/gallium/drivers/radeon/radeon_vce_50.c |  10 +-
 src/gallium/drivers/radeon/radeon_vce_52.c |  16 +--
 src/gallium/drivers/radeon/radeon_video.c  |  14 +-
 src/gallium/drivers/radeon/radeon_winsys.h |  59 
 src/gallium/drivers/radeonsi/cik_sdma.c|  40 +++---
 src/gallium/drivers/radeonsi/si_descriptors.c  |   4 +-
 src/gallium/drivers/radeonsi/si_dma.c  |  42 +++---
 src/gallium/drivers/radeonsi/si_pipe.h |   2 +-
 src/gallium/drivers/radeonsi/si_state.c|  30 ++--
 src/gallium/drivers/radeonsi/si_state.h|   6 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c |  57 
 src/gallium/winsys/radeon/drm/radeon_drm_surface.c |  46 +++---
 20 files changed, 357 insertions(+), 345 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 900e073..371e7ce 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -700,14 +700,14 @@ static int evergreen_fill_tex_resource_words(struct 
r600_context *rctx,
unsigned char array_mode = 0, non_disp_tiling = 0;
unsigned height, depth, width;
unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh;
-   struct radeon_surf_level *surflevel;
+   struct legacy_surf_level *surflevel;
unsigned base_level, first_level, last_level;
unsigned dim, last_layer;
uint64_t va;
bool do_endian_swap = FALSE;
 
-   tile_split = tmp->surface.tile_split;
-   surflevel = tmp->surface.level;
+   tile_split = tmp->surface.u.legacy.tile_split;
+   surflevel = tmp->surface.u.legacy.level;
 
/* Texturing with separate depth and stencil. */
if (tmp->db_compatible) {
@@ -726,8 +726,8 @@ static int evergreen_fill_tex_resource_words(struct 
r600_context *rctx,
case PIPE_FORMAT_S8X24_UINT:
case PIPE_FORMAT_X32_S8X24_UINT:
params->pipe_format = PIPE_FORMAT_S8_UINT;
-   tile_split = tmp->surface.stencil_tile_split;
-   surflevel = tmp->surface.stencil_level;
+   tile_split = tmp->surface.u.legacy.stencil_tile_split;
+   surflevel = tmp->surface.u.legacy.stencil_level;
break;
default:;
}
@@ -777,9 +777,9 @@ static int evergreen_fill_tex_resource_words(struct 
r600_context *rctx,
array_mode = V_028C70_ARRAY_1D_TILED_THIN1;
break;
}
-   macro_aspect = tmp->surface.mtilea;
-   bankw = tmp->surface.bankw;
-   bankh = tmp->surface.bankh;
+   macro_aspect = tmp->surface.u.legacy.mtilea;
+   bankw = tmp->surface.u.legacy.bankw;
+   bankh = tmp->surface.u.legacy.bankh;
tile_split = eg_tile_split(tile_split);
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
@@ -1092,7 +1092,7 @@ static void evergreen_set_color_surface_common(struct 
r600_context *rctx,
bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE;
int i;
 
-   color->offset = rtex->surface.level[level].offset;
+   color->offset = rtex->surface.u.legacy.level[level].offset;
color->view = S_028C6C_SLICE_START(first_layer) |
S_028C6C_SLICE_MAX(last_layer);
 
@@ -1100,14 +1100,14 @@ static void evergreen_set_color_surface_common(struct 
r600_context *rctx,
color->offset >>= 8;
 
color->dim = 0;
-   pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
-   slice = (rtex->surface.level[level].nblk_x * 
rtex->surface.level[level].nblk_y) / 64;
+   pitch = (rtex->surface.u.legacy.level[level].nblk_x) / 8 - 1;
+   slice = (rtex->surface.u.legacy.level[level].nblk_x * 
rtex->surface.u.legacy.level[level].nblk_y) / 64;
if (slice) {
slice = slice - 1;
}
 
color->info = 0;
-   switch (rtex->surface.level[level].mode) {
+   switch (rtex->surface.u.legacy.level[level].mode) {
default:
case RADEON_SURF_MODE_LINEAR_ALIGNED:
color->info = 
S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED);
@@ -1122,14 +1122,14 @@ static void evergreen_set_color_surface_common(struct 
r600_context *rctx,

[Mesa-dev] [PATCH 098/140] gallium/radeon: move pre-GFX9 radeon_bo_metadata.* to u.legacy.*

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/r300/r300_texture.c   | 14 ++---
 src/gallium/drivers/radeon/r600_texture.c | 64 +--
 src/gallium/drivers/radeon/radeon_winsys.h| 24 +
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 74 +++
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 44 
 5 files changed, 119 insertions(+), 101 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_texture.c 
b/src/gallium/drivers/r300/r300_texture.c
index 32cbdcd..c202fbe 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1132,9 +1132,9 @@ r300_texture_create_object(struct r300_screen *rscreen,
 util_format_is_depth_or_stencil(base->format) ? "depth" : 
"color");
 }
 
-tiling.microtile = tex->tex.microtile;
-tiling.macrotile = tex->tex.macrotile[0];
-tiling.stride = tex->tex.stride_in_bytes[0];
+tiling.u.legacy.microtile = tex->tex.microtile;
+tiling.u.legacy.macrotile = tex->tex.macrotile[0];
+tiling.u.legacy.stride = tex->tex.stride_in_bytes[0];
 rws->buffer_set_metadata(tex->buf, );
 
 return tex;
@@ -1195,20 +1195,20 @@ struct pipe_resource *r300_texture_from_handle(struct 
pipe_screen *screen,
 
 /* Enforce a microtiled zbuffer. */
 if (util_format_is_depth_or_stencil(base->format) &&
-tiling.microtile == RADEON_LAYOUT_LINEAR) {
+tiling.u.legacy.microtile == RADEON_LAYOUT_LINEAR) {
 switch (util_format_get_blocksize(base->format)) {
 case 4:
-tiling.microtile = RADEON_LAYOUT_TILED;
+tiling.u.legacy.microtile = RADEON_LAYOUT_TILED;
 break;
 
 case 2:
-tiling.microtile = RADEON_LAYOUT_SQUARETILED;
+tiling.u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
 break;
 }
 }
 
 return (struct pipe_resource*)
-   r300_texture_create_object(rscreen, base, tiling.microtile, 
tiling.macrotile,
+   r300_texture_create_object(rscreen, base, 
tiling.u.legacy.microtile, tiling.u.legacy.macrotile,
   stride, buffer);
 }
 
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index ece1e80..353e942 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -281,24 +281,29 @@ static int r600_init_surface(struct r600_common_screen 
*rscreen,
return 0;
 }
 
-static void r600_texture_init_metadata(struct r600_texture *rtex,
+static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
+  struct r600_texture *rtex,
   struct radeon_bo_metadata *metadata)
 {
struct radeon_surf *surface = >surface;
 
memset(metadata, 0, sizeof(*metadata));
-   metadata->microtile = surface->u.legacy.level[0].mode >= 
RADEON_SURF_MODE_1D ?
-  RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
-   metadata->macrotile = surface->u.legacy.level[0].mode >= 
RADEON_SURF_MODE_2D ?
-  RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
-   metadata->pipe_config = surface->u.legacy.pipe_config;
-   metadata->bankw = surface->u.legacy.bankw;
-   metadata->bankh = surface->u.legacy.bankh;
-   metadata->tile_split = surface->u.legacy.tile_split;
-   metadata->mtilea = surface->u.legacy.mtilea;
-   metadata->num_banks = surface->u.legacy.num_banks;
-   metadata->stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
-   metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+
+   if (rscreen->chip_class >= GFX9) {
+   } else {
+   metadata->u.legacy.microtile = surface->u.legacy.level[0].mode 
>= RADEON_SURF_MODE_1D ?
+  RADEON_LAYOUT_TILED : 
RADEON_LAYOUT_LINEAR;
+   metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode 
>= RADEON_SURF_MODE_2D ?
+  RADEON_LAYOUT_TILED : 
RADEON_LAYOUT_LINEAR;
+   metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+   metadata->u.legacy.bankw = surface->u.legacy.bankw;
+   metadata->u.legacy.bankh = surface->u.legacy.bankh;
+   metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+   metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+   metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+   metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * 
surface->bpe;
+   metadata->u.legacy.scanout = (surface->flags & 
RADEON_SURF_SCANOUT) != 0;
+   }
 }
 
 static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
@@ -526,7 +531,7 @@ static boolean 

[Mesa-dev] [PATCH 096/140] winsys/amdgpu: wire up new addrlib for GFX9

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 400 -
 1 file changed, 399 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index c64865f..8f4244b 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -586,10 +586,408 @@ static int gfx6_surface_init(struct radeon_winsys *rws,
return 0;
 }
 
+/* This is only called when expecting a tiled layout. */
+static int
+gfx9_get_preferred_swizzle_mode(struct amdgpu_winsys *ws,
+ADDR2_COMPUTE_SURFACE_INFO_INPUT *in,
+bool is_fmask, AddrSwizzleMode *swizzle_mode)
+{
+   ADDR_E_RETURNCODE ret;
+   ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
+   ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
+
+   sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
+   sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
+
+   sin.flags = in->flags;
+   sin.resourceType = in->resourceType;
+   sin.format = in->format;
+   sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
+   /* TODO: We could allow some of these: */
+   sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
+   sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes 
*/
+   sin.forbiddenBlock.linear = 1; /* don't allow linear swizzle modes */
+   sin.bpp = in->bpp;
+   sin.width = in->width;
+   sin.height = in->height;
+   sin.numSlices = in->numSlices;
+   sin.numMipLevels = in->numMipLevels;
+   sin.numSamples = in->numSamples;
+   sin.numFrags = in->numFrags;
+
+   if (is_fmask) {
+  sin.flags.color = 0;
+  sin.flags.fmask = 1;
+   }
+
+   ret = Addr2GetPreferredSurfaceSetting(ws->addrlib, , );
+   if (ret != ADDR_OK)
+  return ret;
+
+   *swizzle_mode = sout.swizzleMode;
+   return 0;
+}
+
+static int gfx9_compute_miptree(struct amdgpu_winsys *ws,
+struct radeon_surf *surf, bool compressed,
+ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
+{
+   ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {};
+   ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
+   ADDR_E_RETURNCODE ret;
+
+   out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
+   out.pMipInfo = mip_info;
+
+   ret = Addr2ComputeSurfaceInfo(ws->addrlib, in, );
+   if (ret != ADDR_OK)
+  return ret;
+
+   if (in->flags.stencil) {
+  surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode;
+  surf->u.gfx9.stencil.epitch = out.epitchIsHeight ? out.mipChainHeight - 
1 :
+ out.mipChainPitch - 1;
+  surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign);
+  surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign);
+  surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize;
+  return 0;
+   }
+
+   surf->u.gfx9.surf.swizzle_mode = in->swizzleMode;
+   surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
+   out.mipChainPitch - 1;
+   surf->u.gfx9.surf_slice_size = out.sliceSize;
+   surf->u.gfx9.surf_pitch = out.pitch;
+   surf->surf_size = out.surfSize;
+   surf->surf_alignment = out.baseAlign;
+
+   if (in->swizzleMode == ADDR_SW_LINEAR) {
+  for (unsigned i = 0; i < in->numMipLevels; i++)
+ surf->u.gfx9.surf_ymip_offset[i] = mip_info[i].mipOffsetYPixel;
+   }
+
+   if (in->flags.depth) {
+  assert(in->swizzleMode != ADDR_SW_LINEAR);
+
+  /* HTILE */
+  ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
+  ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
+
+  hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
+  hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
+
+  hin.hTileFlags.pipeAligned = 1;
+  hin.hTileFlags.rbAligned = 1;
+  hin.depthFlags = in->flags;
+  hin.swizzleMode = in->swizzleMode;
+  hin.unalignedWidth = in->width;
+  hin.unalignedHeight = in->height;
+  hin.numSlices = in->numSlices;
+  hin.numMipLevels = in->numMipLevels;
+
+  ret = Addr2ComputeHtileInfo(ws->addrlib, , );
+  if (ret != ADDR_OK)
+ return ret;
+
+  surf->u.gfx9.htile.rb_aligned = hin.hTileFlags.rbAligned;
+  surf->u.gfx9.htile.pipe_aligned = hin.hTileFlags.pipeAligned;
+  surf->htile_size = hout.htileBytes;
+  surf->htile_alignment = hout.baseAlign;
+   } else {
+  /* DCC */
+  if (!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
+  !(surf->flags & RADEON_SURF_SCANOUT) &&
+  !compressed &&
+  in->swizzleMode != ADDR_SW_LINEAR &&
+  /* TODO: We could support DCC with MSAA. */
+  in->numSamples == 1) {
+ ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
+ ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+
+ din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
+ 

[Mesa-dev] [PATCH 093/140] gallium/radeon: add GFX9 surface info to radeon_surf

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/radeon_winsys.h | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 3dec92c..bfc067d 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -323,6 +323,41 @@ struct legacy_surf_layout {
 uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
 };
 
+struct gfx9_surf_flags {
+uint16_tswizzle_mode; /* tile mode */
+uint16_tepitch; /* (pitch - 1) or (height - 1) */
+};
+
+struct gfx9_surf_meta_flags {
+unsignedrb_aligned:1;   /* optimal for RBs */
+unsignedpipe_aligned:1; /* optimal for TC */
+};
+
+struct gfx9_surf_layout {
+struct gfx9_surf_flags  surf;/* color or depth surface */
+struct gfx9_surf_flags  fmask;   /* not added to surf_size */
+struct gfx9_surf_flags  stencil; /* added to surf_size, use 
stencil_offset */
+
+struct gfx9_surf_meta_flags dcc;   /* metadata of color */
+struct gfx9_surf_meta_flags htile; /* metadata of depth and stencil */
+struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
+
+/* The size of the 2D plane containing all mipmap levels. */
+uint64_tsurf_slice_size;
+uint16_tsurf_pitch; /* in blocks */
+/* Y mipmap level offset in blocks. Only valid for LINEAR. */
+uint16_tsurf_ymip_offset[RADEON_SURF_MAX_LEVELS];
+
+uint16_tdcc_pitch_max;  /* (mip chain pitch - 1) */
+
+uint64_tstencil_offset; /* separate stencil */
+uint64_tfmask_size;
+uint64_tcmask_size;
+
+uint32_tfmask_alignment;
+uint32_tcmask_alignment;
+};
+
 struct radeon_surf {
 /* Format properties. */
 unsignedblk_w:4;
@@ -357,6 +392,9 @@ struct radeon_surf {
  * desirable. The allocator will try to obey them.
  */
 struct legacy_surf_layout legacy;
+
+/* GFX9+ return values. */
+struct gfx9_surf_layout gfx9;
 } u;
 };
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 089/140] radeonsi/gfx9: pad shader binaries by 128 bytes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cc0ef96..226924b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5865,20 +5865,26 @@ int si_shader_binary_upload(struct si_screen *sscreen, 
struct si_shader *shader)
shader->epilog ? >epilog->binary : NULL;
const struct ac_shader_binary *mainb = >binary;
unsigned bo_size = si_get_shader_binary_size(shader) +
   (!epilog ? mainb->rodata_size : 0);
unsigned char *ptr;
 
assert(!prolog || !prolog->rodata_size);
assert((!prolog && !epilog) || !mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
 
+   /* GFX9 can fetch at most 128 bytes past the end of the shader.
+* Prevent VM faults.
+*/
+   if (sscreen->b.chip_class >= GFX9)
+   bo_size += 128;
+
r600_resource_reference(>bo, NULL);
shader->bo = (struct r600_resource*)
 pipe_buffer_create(>b.b, 0,
PIPE_USAGE_IMMUTABLE,
align(bo_size, SI_CPDMA_ALIGNMENT));
if (!shader->bo)
return -ENOMEM;
 
/* Upload. */
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 075/140] radeonsi/gfx9: query changes - EVENT_WRITE and SET_PREDICATION

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_query.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index e269c39..dcd217b 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -644,40 +644,40 @@ static void r600_query_hw_do_emit_start(struct 
r600_common_context *ctx,
uint64_t va)
 {
struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | 
EVENT_INDEX(1));
radeon_emit(cs, va);
-   radeon_emit(cs, (va >> 32) & 0x);
+   radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | 
EVENT_INDEX(3));
radeon_emit(cs, va);
-   radeon_emit(cs, (va >> 32) & 0x);
+   radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_TIME_ELAPSED:
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
 0, 3, NULL, va, 0, 0);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));
radeon_emit(cs, va);
-   radeon_emit(cs, (va >> 32) & 0x);
+   radeon_emit(cs, va >> 32);
break;
default:
assert(0);
}
r600_emit_reloc(ctx, >gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
 }
 
 static void r600_query_hw_emit_start(struct r600_common_context *ctx,
 struct r600_query_hw *query)
@@ -720,50 +720,50 @@ static void r600_query_hw_do_emit_stop(struct 
r600_common_context *ctx,
struct radeon_winsys_cs *cs = ctx->gfx.cs;
uint64_t fence_va = 0;
 
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
va += 8;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | 
EVENT_INDEX(1));
radeon_emit(cs, va);
-   radeon_emit(cs, (va >> 32) & 0x);
+   radeon_emit(cs, va >> 32);
 
fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
va += query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | 
EVENT_INDEX(3));
radeon_emit(cs, va);
-   radeon_emit(cs, (va >> 32) & 0x);
+   radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
 0, 3, NULL, va, 0, 0);
fence_va = va + 8;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
unsigned sample_size = (query->result_size - 8) / 2;
 
va += sample_size;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));
radeon_emit(cs, va);
-   radeon_emit(cs, (va >> 32) & 0x);
+   radeon_emit(cs, va >> 32);
 
fence_va = va + sample_size;
break;
}
default:
assert(0);
}
r600_emit_reloc(ctx, >gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
 
@@ -834,26 +834,35 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
if (ctx->render_cond_invert)
op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not 
visable/overflow */
else
op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
 
op |= 

[Mesa-dev] [PATCH 090/140] radeonsi/gfx9: draw changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 43 
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8c6e9cd..f01ac01 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -20,20 +20,21 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *  Christian König 
  */
 
 #include "si_pipe.h"
 #include "radeon/r600_cs.h"
 #include "sid.h"
+#include "gfx9d.h"
 
 #include "util/u_index_modify.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_prim.h"
 
 #include "ac_debug.h"
 
 static unsigned si_conv_pipe_prim(unsigned mode)
 {
 static const unsigned prim_conv[] = {
@@ -372,21 +373,23 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
if (ia_switch_on_eoi)
partial_es_wave = true;
 
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_WD_SWITCH_ON_EOP(sscreen->b.chip_class >= CIK ? 
wd_switch_on_eop : 0) |
S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class >= VI ?
-max_primgroup_in_wave : 0);
+max_primgroup_in_wave : 0) |
+   S_030960_EN_INST_OPT_BASIC(sscreen->b.chip_class >= GFX9) |
+   S_030960_EN_INST_OPT_ADV(sscreen->b.chip_class >= GFX9);
 }
 
 void si_init_ia_multi_vgt_param_table(struct si_context *sctx)
 {
for (int prim = 0; prim <= R600_PRIM_RECTANGLE_LIST; prim++)
for (int uses_instancing = 0; uses_instancing < 2; uses_instancing++)
for (int multi_instances = 0; multi_instances < 2; multi_instances++)
for (int primitive_restart = 0; primitive_restart < 2; 
primitive_restart++)
for (int count_from_so = 0; count_from_so < 2; count_from_so++)
for (int line_stipple = 0; line_stipple < 2; line_stipple++)
@@ -499,21 +502,23 @@ static void si_emit_draw_registers(struct si_context 
*sctx,
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param, num_patches = 0;
 
if (sctx->tes_shader.cso)
si_emit_derived_tess_state(sctx, info, _patches);
 
ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
 
/* Draw state. */
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
-   if (sctx->b.chip_class >= CIK)
+   if (sctx->b.chip_class >= GFX9)
+   radeon_set_uconfig_reg_idx(cs, 
R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
+   else if (sctx->b.chip_class >= CIK)
radeon_set_context_reg_idx(cs, 
R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
else
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, 
ia_multi_vgt_param);
 
sctx->last_multi_vgt_param = ia_multi_vgt_param;
}
if (prim != sctx->last_prim) {
if (sctx->b.chip_class >= CIK)
radeon_set_uconfig_reg_idx(cs, 
R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
else
@@ -522,21 +527,27 @@ static void si_emit_draw_registers(struct si_context 
*sctx,
sctx->last_prim = prim;
}
 
if (gs_out_prim != sctx->last_gs_out_prim) {
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 
gs_out_prim);
sctx->last_gs_out_prim = gs_out_prim;
}
 
/* Primitive restart. */
if (info->primitive_restart != sctx->last_primitive_restart_en) {
-   radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 
info->primitive_restart);
+   if (sctx->b.chip_class >= GFX9)
+   radeon_set_uconfig_reg(cs, 
R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
+  info->primitive_restart);
+   else
+   radeon_set_context_reg(cs, 
R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
+  info->primitive_restart);
+
sctx->last_primitive_restart_en = info->primitive_restart;
 
}
if (info->primitive_restart &&
(info->restart_index != sctx->last_restart_index ||
 sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
radeon_set_context_reg(cs, 
R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
   

[Mesa-dev] [PATCH 094/140] winsys/amdgpu: rename GFX6 surface functions

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 54 ++
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 021a7ca..ede8ba6 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -142,16 +142,16 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
return addrCreateOutput.hLib;
 }
 
-static int compute_level(struct amdgpu_winsys *ws,
- const struct pipe_resource *tex,
- struct radeon_surf *surf, bool is_stencil,
- unsigned level, bool compressed,
- ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
- ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
- ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
- ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
- ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
+static int gfx6_compute_level(struct amdgpu_winsys *ws,
+  const struct pipe_resource *tex,
+  struct radeon_surf *surf, bool is_stencil,
+  unsigned level, bool compressed,
+  ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
+  ADDR_COMPUTE_SURFACE_INFO_OUTPUT 
*AddrSurfInfoOut,
+  ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
+  ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
+  ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
+  ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
 {
struct legacy_surf_level *surf_level;
ADDR_E_RETURNCODE ret;
@@ -271,8 +271,8 @@ static int compute_level(struct amdgpu_winsys *ws,
 #define   G_009910_MICRO_TILE_MODE(x)  (((x) >> 0) & 0x03)
 #define   G_009910_MICRO_TILE_MODE_NEW(x)  (((x) >> 22) & 0x07)
 
-static void set_micro_tile_mode(struct radeon_surf *surf,
-struct radeon_info *info)
+static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
+ struct radeon_info *info)
 {
uint32_t tile_mode = 
info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
 
@@ -296,11 +296,11 @@ static unsigned cik_get_macro_tile_index(struct 
radeon_surf *surf)
return index;
 }
 
-static int amdgpu_surface_init(struct radeon_winsys *rws,
-   const struct pipe_resource *tex,
-   unsigned flags, unsigned bpe,
-   enum radeon_surf_mode mode,
-   struct radeon_surf *surf)
+static int gfx6_surface_init(struct radeon_winsys *rws,
+ const struct pipe_resource *tex,
+ unsigned flags, unsigned bpe,
+ enum radeon_surf_mode mode,
+ struct radeon_surf *surf)
 {
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
unsigned level;
@@ -497,16 +497,16 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 
/* Calculate texture layout information. */
for (level = 0; level <= tex->last_level; level++) {
-  r = compute_level(ws, tex, surf, false, level, compressed,
-, ,
-, , , );
+  r = gfx6_compute_level(ws, tex, surf, false, level, compressed,
+ , ,
+ , , , 
);
   if (r)
  return r;
 
   if (level == 0) {
  surf->surf_alignment = AddrSurfInfoOut.baseAlign;
  surf->u.legacy.pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 
1;
- set_micro_tile_mode(surf, >info);
+ gfx6_set_micro_tile_mode(surf, >info);
 
  /* For 2D modes only. */
  if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
@@ -532,9 +532,10 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
   AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
 
   for (level = 0; level <= tex->last_level; level++) {
- r = compute_level(ws, tex, surf, true, level, compressed,
-   , , , 
,
-   NULL, NULL);
+ r = gfx6_compute_level(ws, tex, surf, true, level, compressed,
+, ,
+, ,
+NULL, NULL);
  if (r)
 return r;
 
@@ -574,5 +575,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 
 void amdgpu_surface_init_functions(struct amdgpu_winsys *ws)
 {
-   ws->base.surface_init = amdgpu_surface_init;
+   if 

[Mesa-dev] [PATCH 081/140] radeonsi/gfx9: enable ETC2

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 5f0eab1..c91b0a7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1418,21 +1418,22 @@ static uint32_t si_translate_texformat(struct 
pipe_screen *screen,
case PIPE_FORMAT_LATC2_SNORM:
case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_LATC2_UNORM:
return V_008F14_IMG_DATA_FORMAT_BC5;
default:
goto out_unknown;
}
}
 
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
-   sscreen->b.family == CHIP_STONEY) {
+   (sscreen->b.family == CHIP_STONEY ||
+sscreen->b.chip_class >= GFX9)) {
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
case PIPE_FORMAT_ETC2_RGB8:
case PIPE_FORMAT_ETC2_SRGB8:
return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
case PIPE_FORMAT_ETC2_RGB8A1:
case PIPE_FORMAT_ETC2_SRGB8A1:
return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
case PIPE_FORMAT_ETC2_RGBA8:
case PIPE_FORMAT_ETC2_SRGBA8:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 080/140] radeonsi/gfx9: disable RB+ on Vega10

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.c   |  4 +++-
 src/gallium/drivers/radeon/r600_pipe_common.h   |  2 ++
 src/gallium/drivers/radeon/r600_texture.c   | 13 ++--
 src/gallium/drivers/radeonsi/si_pipe.c  | 13 
 src/gallium/drivers/radeonsi/si_state.c | 27 -
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 +-
 6 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 42dc38b..c33b457 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -717,21 +717,21 @@ static const struct debug_named_value 
common_debug_options[] = {
{ "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of 
INVALIDATE_RANGE map flags" },
{ "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
{ "notiling", DBG_NO_TILING, "Disable tiling" },
{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on 
end-of-packet." },
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations 
when possible." },
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader 
creation." },
{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
-   { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+   { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction 
Scheduler." },
{ "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders 
compiled on demand" },
{ "noce", DBG_NO_CE, "Disable the constant engine"},
{ "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader 
optimizations" },
{ "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main 
framebuffer" },
 
DEBUG_NAMED_VALUE_END /* must be last */
 };
 
 static const char* r600_get_vendor(struct pipe_screen* pscreen)
@@ -1310,20 +1310,22 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
rscreen->b.is_video_format_supported = 
vl_video_buffer_is_format_supported;
}
 
r600_init_screen_texture_functions(rscreen);
r600_init_screen_query_functions(rscreen);
 
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", 
common_debug_options, 0);
+   rscreen->has_rbplus = false;
+   rscreen->rbplus_allowed = false;
 
r600_disk_cache_create(rscreen);
 
slab_create_parent(>pool_transfers, sizeof(struct 
r600_transfer), 64);
 
rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", 
-1));
if (rscreen->force_aniso >= 0) {
printf("radeon: Forcing anisotropy filter to %ix\n",
   /* round down to a power of two */
   1 << util_logbase2(rscreen->force_aniso));
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 3516884..883d5ed 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -363,20 +363,22 @@ union r600_mmio_counters {
 
 struct r600_common_screen {
struct pipe_screen  b;
struct radeon_winsys*ws;
enum radeon_family  family;
enum chip_class chip_class;
struct radeon_info  info;
uint64_tdebug_flags;
boolhas_cp_dma;
boolhas_streamout;
+   boolhas_rbplus; /* if RB+ registers 
exist */
+   boolrbplus_allowed; /* if RB+ is allowed */
 
struct disk_cache   *disk_shader_cache;
 
struct slab_parent_pool pool_transfers;
 
/* Texture filter settings. */
int force_aniso; /* -1 = disabled */
 
/* Auxiliary context. Mainly used to initialize resources.
 * It must be locked prior to using and flushed before unlocking. */
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index ec7a325..2953379 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2468,26 +2468,27 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
continue;
}
 
/* Fast clear is the most appropriate place to 

[Mesa-dev] [PATCH 065/140] amd: define event types for GFX9

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/gfx9d.h | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
index b8ff825..1bc11b1 100644
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -6179,20 +6179,74 @@
 #define   C_028A84_DISABLE_RESET_ON_EOI   
0xFFFD
 #define   S_028A84_NGG_DISABLE_PROVOK_REUSE(x)
(((unsigned)(x) & 0x1) << 2)
 #define   G_028A84_NGG_DISABLE_PROVOK_REUSE(x)(((x) >> 
2) & 0x1)
 #define   C_028A84_NGG_DISABLE_PROVOK_REUSE   
0xFFFB
 #define R_028A88_VGT_DMA_NUM_INSTANCES  
0x028A88
 #define R_028A8C_VGT_PRIMITIVEID_RESET  
0x028A8C
 #define R_028A90_VGT_EVENT_INITIATOR
0x028A90
 #define   S_028A90_EVENT_TYPE(x)  
(((unsigned)(x) & 0x3F) << 0)
 #define   G_028A90_EVENT_TYPE(x)  (((x) >> 
0) & 0x3F)
 #define   C_028A90_EVENT_TYPE 
0xFFC0
+#define V_028A90_SAMPLE_STREAMOUTSTATS1 0x01
+#define V_028A90_SAMPLE_STREAMOUTSTATS2 0x02
+#define V_028A90_SAMPLE_STREAMOUTSTATS3 0x03
+#define V_028A90_CACHE_FLUSH_TS 0x04
+#define V_028A90_CONTEXT_DONE   0x05
+#define V_028A90_CACHE_FLUSH0x06
+#define V_028A90_CS_PARTIAL_FLUSH   0x07
+#define V_028A90_VGT_STREAMOUT_SYNC 0x08
+#define V_028A90_VGT_STREAMOUT_RESET0x0A
+#define V_028A90_END_OF_PIPE_INCR_DE0x0B
+#define V_028A90_END_OF_PIPE_IB_END 0x0C
+#define V_028A90_RST_PIX_CNT0x0D
+#define V_028A90_BREAK_BATCH0x0E /* 
new */
+#define V_028A90_VS_PARTIAL_FLUSH   0x0F
+#define V_028A90_PS_PARTIAL_FLUSH   0x10
+#define V_028A90_FLUSH_HS_OUTPUT0x11
+#define V_028A90_FLUSH_DFSM 0x12 /* 
new */
+#define V_028A90_RESET_TO_LOWEST_VGT0x13 /* 
new */
+#define V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT   0x14
+#define V_028A90_ZPASS_DONE 0x15
+#define V_028A90_CACHE_FLUSH_AND_INV_EVENT  0x16
+#define V_028A90_PERFCOUNTER_START  0x17
+#define V_028A90_PERFCOUNTER_STOP   0x18
+#define V_028A90_PIPELINESTAT_START 0x19
+#define V_028A90_PIPELINESTAT_STOP  0x1A
+#define V_028A90_PERFCOUNTER_SAMPLE 0x1B
+#define V_028A90_SAMPLE_PIPELINESTAT0x1E
+#define V_028A90_SO_VGTSTREAMOUT_FLUSH  0x1F
+#define V_028A90_SAMPLE_STREAMOUTSTATS  0x20
+#define V_028A90_RESET_VTX_CNT  0x21
+#define V_028A90_BLOCK_CONTEXT_DONE 0x22
+#define V_028A90_CS_CONTEXT_DONE0x23
+#define V_028A90_VGT_FLUSH  0x24
+#define V_028A90_TGID_ROLLOVER  0x25
+#define V_028A90_SC_SEND_DB_VPZ 0x27
+#define V_028A90_BOTTOM_OF_PIPE_TS  0x28
+#define V_028A90_DB_CACHE_FLUSH_AND_INV 0x2A
+#define V_028A90_FLUSH_AND_INV_DB_DATA_TS   0x2B
+#define V_028A90_FLUSH_AND_INV_DB_META  0x2C
+#define V_028A90_FLUSH_AND_INV_CB_DATA_TS   0x2D
+#define V_028A90_FLUSH_AND_INV_CB_META  0x2E
+#define V_028A90_CS_DONE0x2F
+#define V_028A90_PS_DONE0x30
+#define V_028A90_FLUSH_AND_INV_CB_PIXEL_DATA0x31
+#define V_028A90_THREAD_TRACE_START 0x33
+#define V_028A90_THREAD_TRACE_STOP  0x34
+#define V_028A90_THREAD_TRACE_MARKER0x35
+#define V_028A90_THREAD_TRACE_FLUSH 0x36
+#define V_028A90_THREAD_TRACE_FINISH0x37
+#define V_028A90_PIXEL_PIPE_STAT_CONTROL0x38
+#define V_028A90_PIXEL_PIPE_STAT_DUMP   

[Mesa-dev] [PATCH 070/140] radeonsi/gfx9: don't read back non-existent SRBM registers

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_debug.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index db310b7..1a4cadf 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -176,23 +176,25 @@ static void si_dump_debug_registers(struct si_context 
*sctx, FILE *f)
return;
}
 
si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
-   si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
-   si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
-   si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
+   if (sctx->b.chip_class <= VI) {
+   si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
+   si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
+   si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
+   }
si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 022/140] amdgpu/addrlib: add explicit Log2NonPow2 function

2017-03-20 Thread Marek Olšák
From: Roy Zhan 

---
 src/amd/addrlib/core/addrcommon.h | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/amd/addrlib/core/addrcommon.h 
b/src/amd/addrlib/core/addrcommon.h
index 019ebd0..0dff0b8 100644
--- a/src/amd/addrlib/core/addrcommon.h
+++ b/src/amd/addrlib/core/addrcommon.h
@@ -438,48 +438,60 @@ static inline UINT_32 NextPow2(
 {
 newDim <<= 1;
 }
 }
 
 return newDim;
 }
 
 /**
 
***
-*   Log2
+*   Log2NonPow2
 *
 *   @brief
-*   Compute log of base 2
+*   Compute log of base 2 no matter the target is power of 2 or not
 
***
 */
-static inline UINT_32 Log2(
+static inline UINT_32 Log2NonPow2(
 UINT_32 x)  ///< [in] the value should calculate log based 2
 {
 UINT_32 y;
 
-//
-// Assert that x is a power of two.
-//
-ADDR_ASSERT(IsPow2(x));
-
 y = 0;
 while (x > 1)
 {
 x >>= 1;
 y++;
 }
 
 return y;
 }
 
 /**
 
***
+*   Log2
+*
+*   @brief
+*   Compute log of base 2
+***
+*/
+static inline UINT_32 Log2(
+UINT_32 x)  ///< [in] the value should calculate log based 2
+{
+// Assert that x is a power of two.
+ADDR_ASSERT(IsPow2(x));
+
+return Log2NonPow2(x);
+}
+
+/**
+***
 *   QLog2
 *
 *   @brief
 *   Compute log of base 2 quickly (<= 16)
 
***
 */
 static inline UINT_32 QLog2(
 UINT_32 x)  ///< [in] the value should calculate log based 2
 {
 ADDR_ASSERT(x <= 16);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 064/140] amd: add texture format definitions for GFX9

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

the DATA_FORMAT and NUM_FORMAT fields are the same, but some of the enums
differ, thus add GFX6 and GFX9 suffixes, so that the IB parser can show
enums for both.
---
 src/amd/common/gfx9d.h  | 137 ++--
 src/amd/common/sid.h|  24 +++---
 src/gallium/drivers/radeonsi/si_state.c |   8 +-
 3 files changed, 147 insertions(+), 22 deletions(-)

diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
index 702508b..b8ff825 100644
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -1276,26 +1276,147 @@
 #define   S_030F14_COUNT_HI(x)
(((unsigned)(x) & 0x7FFF) << 0)
 #define   G_030F14_COUNT_HI(x)(((x) >> 
0) & 0x7FFF)
 #define   C_030F14_COUNT_HI   
0x8000
 #define R_008F14_SQ_IMG_RSRC_WORD1  
0x008F14
 #define   S_008F14_BASE_ADDRESS_HI(x) 
(((unsigned)(x) & 0xFF) << 0)
 #define   G_008F14_BASE_ADDRESS_HI(x) (((x) >> 
0) & 0xFF)
 #define   C_008F14_BASE_ADDRESS_HI
0xFF00
 #define   S_008F14_MIN_LOD(x) 
(((unsigned)(x) & 0xFFF) << 8)
 #define   G_008F14_MIN_LOD(x) (((x) >> 
8) & 0xFFF)
 #define   C_008F14_MIN_LOD
0xFFF000FF
-#define   S_008F14_DATA_FORMAT(x) 
(((unsigned)(x) & 0x3F) << 20)
-#define   G_008F14_DATA_FORMAT(x) (((x) >> 
20) & 0x3F)
-#define   C_008F14_DATA_FORMAT
0xFC0F
-#define   S_008F14_NUM_FORMAT(x)  
(((unsigned)(x) & 0x0F) << 26)
-#define   G_008F14_NUM_FORMAT(x)  (((x) >> 
26) & 0x0F)
-#define   C_008F14_NUM_FORMAT 
0xC3FF
+#define   S_008F14_DATA_FORMAT_GFX9(x)
(((unsigned)(x) & 0x3F) << 20)
+#define   G_008F14_DATA_FORMAT_GFX9(x)(((x) >> 
20) & 0x3F)
+#define   C_008F14_DATA_FORMAT_GFX9   
0xFC0F
+#define V_008F14_IMG_DATA_FORMAT_INVALID0x00
+#define V_008F14_IMG_DATA_FORMAT_8  0x01
+#define V_008F14_IMG_DATA_FORMAT_16 0x02
+#define V_008F14_IMG_DATA_FORMAT_8_80x03
+#define V_008F14_IMG_DATA_FORMAT_32 0x04
+#define V_008F14_IMG_DATA_FORMAT_16_16  0x05
+#define V_008F14_IMG_DATA_FORMAT_10_11_11   0x06
+#define V_008F14_IMG_DATA_FORMAT_11_11_10   0x07
+#define V_008F14_IMG_DATA_FORMAT_10_10_10_2 0x08
+#define V_008F14_IMG_DATA_FORMAT_2_10_10_10 0x09
+#define V_008F14_IMG_DATA_FORMAT_8_8_8_80x0A
+#define V_008F14_IMG_DATA_FORMAT_32_32  0x0B
+#define V_008F14_IMG_DATA_FORMAT_16_16_16_160x0C
+#define V_008F14_IMG_DATA_FORMAT_32_32_32   0x0D
+#define V_008F14_IMG_DATA_FORMAT_32_32_32_320x0E
+#define V_008F14_IMG_DATA_FORMAT_RESERVED_150x0F
+#define V_008F14_IMG_DATA_FORMAT_5_6_5  0x10
+#define V_008F14_IMG_DATA_FORMAT_1_5_5_50x11
+#define V_008F14_IMG_DATA_FORMAT_5_5_5_10x12
+#define V_008F14_IMG_DATA_FORMAT_4_4_4_40x13
+#define V_008F14_IMG_DATA_FORMAT_8_24   0x14
+#define V_008F14_IMG_DATA_FORMAT_24_8   0x15
+#define V_008F14_IMG_DATA_FORMAT_X24_8_32   0x16
+#define V_008F14_IMG_DATA_FORMAT_8_AS_8_8_8_8   0x17
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RGB   0x18
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RGBA  0x19
+#define V_008F14_IMG_DATA_FORMAT_ETC2_R 0x1A
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RG0x1B
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1 0x1C
+#define V_008F14_IMG_DATA_FORMAT_RESERVED_290x1D
+#define V_008F14_IMG_DATA_FORMAT_RESERVED_300x1E
+#define V_008F14_IMG_DATA_FORMAT_6E40x1F
+#define V_008F14_IMG_DATA_FORMAT_GB_GR  0x20
+#define V_008F14_IMG_DATA_FORMAT_BG_RG  0x21
+#define V_008F14_IMG_DATA_FORMAT_5_9_9_90x22
+#define 

[Mesa-dev] [PATCH 091/140] radeonsi/gfx9: allow Z16_UNORM for TC-compatible HTILE

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2953379..b81dbb7 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -217,24 +217,27 @@ static int r600_init_surface(struct r600_common_screen 
*rscreen,
bpe = 4;
}
}
 
if (!is_flushed_depth && is_depth) {
flags |= RADEON_SURF_ZBUFFER;
 
if (tc_compatible_htile &&
array_mode == RADEON_SURF_MODE_2D) {
/* TC-compatible HTILE only supports Z32_FLOAT.
-* Promote Z16 to Z32. DB->CB copies will convert
+* GFX9 also supports Z16_UNORM.
+* On VI, promote Z16 to Z32. DB->CB copies will convert
 * the format for transfers.
 */
-   bpe = 4;
+   if (rscreen->chip_class == VI)
+   bpe = 4;
+
flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
}
 
if (is_stencil)
flags |= RADEON_SURF_SBUFFER;
}
 
if (rscreen->chip_class >= VI &&
(ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
 ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
@@ -1004,25 +1007,32 @@ r600_texture_create_object(struct pipe_screen *screen,
/* don't include stencil-only formats which we don't support for 
rendering */
rtex->is_depth = 
util_format_has_depth(util_format_description(rtex->resource.b.b.format));
 
rtex->surface = *surface;
rtex->size = rtex->surface.surf_size;
 
rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
(rtex->surface.flags &
 RADEON_SURF_TC_COMPATIBLE_HTILE);
 
-   /* TC-compatible HTILE only supports Z32_FLOAT. */
-   if (rtex->tc_compatible_htile)
-   rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
-   else
+   /* TC-compatible HTILE:
+* - VI only supports Z32_FLOAT.
+* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
+   if (rtex->tc_compatible_htile) {
+   if (rscreen->chip_class >= GFX9 &&
+   base->format == PIPE_FORMAT_Z16_UNORM)
+   rtex->db_render_format = base->format;
+   else
+   rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+   } else {
rtex->db_render_format = base->format;
+   }
 
/* Tiled depth textures utilize the non-displayable tile order.
 * This must be done after r600_setup_surface.
 * Applies to R600-Cayman. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode 
>= RADEON_SURF_MODE_1D;
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = 
rtex->surface.micro_tile_mode;
 
/* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
 * between frames, so the only thing that can enable separate DCC
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 076/140] radeonsi/gfx9: CP DMA changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/sid.h |  1 +
 src/gallium/drivers/radeonsi/si_cp_dma.c | 37 ++--
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index e0c3a02..75ba965 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -217,20 +217,21 @@
 #define   V_411_SRC_ADDR   0
 #define   V_411_GDS1 /* program SAS to 1 as well */
 #define   V_411_DATA   2
 #define   V_411_SRC_ADDR_TC_L2 3 /* new for CIK */
 #define S_411_ENGINE(x)(((unsigned)(x) & 0x1) << 27)
 #define   V_411_ME 0
 #define   V_411_PFP1
 #define S_411_DSL_SEL(x)   (((unsigned)(x) & 0x3) << 20)
 #define   V_411_DST_ADDR   0
 #define   V_411_GDS1 /* program DAS to 1 as well */
+#define   V_411_NOWHERE2 /* new for GFX9 */
 #define   V_411_DST_ADDR_TC_L2 3 /* new for CIK */
 #define S_411_SRC_ADDR_HI(x)   ((x) & 0x)
 #define   R_412_CP_DMA_WORD2   0x412 /* 0x[packet number][word index] 
*/
 #define S_412_DST_ADDR_LO(x)   ((x) & 0x)
 #define   R_413_CP_DMA_WORD3   0x413 /* 0x[packet number][word index] 
*/
 #define S_413_DST_ADDR_HI(x)   ((x) & 0x)
 #define   R_414_COMMAND0x414
 #define S_414_BYTE_COUNT_GFX6(x)   ((x) & 0x1f)
 #define S_414_BYTE_COUNT_GFX9(x)   ((x) & 0x3ff)
 #define S_414_DISABLE_WR_CONFIRM_GFX6(x) (((unsigned)(x) & 0x1) << 21) /* 
not on GFX9 */
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index a564468..ccc8672 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -35,45 +35,68 @@
  * It should be set on the last CP DMA packet. */
 #define CP_DMA_SYNC(1 << 0)
 
 /* Set this if the source data was used as a destination in a previous CP DMA
  * packet. It's for preventing a read-after-write (RAW) hazard between two
  * CP DMA packets. */
 #define CP_DMA_RAW_WAIT(1 << 1)
 #define CP_DMA_USE_L2  (1 << 2) /* CIK+ */
 #define CP_DMA_CLEAR   (1 << 3)
 
+/* The max number of bytes that can be copied per packet. */
+static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
+{
+   unsigned max = sctx->b.chip_class >= GFX9 ?
+  S_414_BYTE_COUNT_GFX9(~0u) :
+  S_414_BYTE_COUNT_GFX6(~0u);
+
+   /* make it aligned for optimal performance */
+   return max & ~(SI_CPDMA_ALIGNMENT - 1);
+}
+
+
 /* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
  * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va 
is a 32-bit
  * clear value.
  */
 static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
   uint64_t src_va, unsigned size, unsigned flags,
   enum r600_coherency coher)
 {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-   uint32_t header = 0, command = S_414_BYTE_COUNT_GFX6(size);
+   uint32_t header = 0, command = 0;
 
assert(size);
-   assert(size <= CP_DMA_MAX_BYTE_COUNT);
+   assert(size <= cp_dma_max_byte_count(sctx));
+
+   if (sctx->b.chip_class >= GFX9)
+   command |= S_414_BYTE_COUNT_GFX9(size);
+   else
+   command |= S_414_BYTE_COUNT_GFX6(size);
 
/* Sync flags. */
if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1);
-   else
-   command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
+   else {
+   if (sctx->b.chip_class >= GFX9)
+   command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
+   else
+   command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
+   }
 
if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1);
 
/* Src and dst flags. */
-   if (flags & CP_DMA_USE_L2)
+   if (sctx->b.chip_class >= GFX9 && src_va == dst_va)
+   header |= S_411_DSL_SEL(V_411_NOWHERE); /* prefetch only */
+   else if (flags & CP_DMA_USE_L2)
header |= S_411_DSL_SEL(V_411_DST_ADDR_TC_L2);
 
if (flags & CP_DMA_CLEAR)
header |= S_411_SRC_SEL(V_411_DATA);
else if (flags & CP_DMA_USE_L2)
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
 
if (sctx->b.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header);
@@ -226,21 +249,21 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
return;
}
 
uint64_t va = rdst->gpu_address + offset;
 
/* Flush the caches. */

[Mesa-dev] [PATCH 073/140] radeonsi/gfx9: INDIRECT_BUFFER change

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pm4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c 
b/src/gallium/drivers/radeonsi/si_pm4.c
index 2680439..bf923ec 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -143,21 +143,21 @@ void si_pm4_emit(struct si_context *sctx, struct 
si_pm4_state *state)
radeon_emit_array(cs, state->pm4, state->ndw);
} else {
struct r600_resource *ib = state->indirect_buffer;
 
radeon_add_to_buffer_list(>b, >b.gfx, ib,
  RADEON_USAGE_READ,
   RADEON_PRIO_IB2);
 
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cs, ib->gpu_address);
-   radeon_emit(cs, (ib->gpu_address >> 32) & 0x);
+   radeon_emit(cs, ib->gpu_address >> 32);
radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xf);
}
 }
 
 void si_pm4_reset_emitted(struct si_context *sctx)
 {
memset(>emitted, 0, sizeof(sctx->emitted));
sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES);
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 074/140] radeonsi/gfx9: EVENT_WRITE_EOP -> RELEASE_MEM

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.c | 39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 052054b..42dc38b 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -103,41 +103,52 @@ void r600_gfx_write_event_eop(struct r600_common_context 
*ctx,
  unsigned event, unsigned event_flags,
  unsigned data_sel,
  struct r600_resource *buf, uint64_t va,
  uint32_t old_fence, uint32_t new_fence)
 {
struct radeon_winsys_cs *cs = ctx->gfx.cs;
unsigned op = EVENT_TYPE(event) |
  EVENT_INDEX(5) |
  event_flags;
 
-   if (ctx->chip_class == CIK ||
-   ctx->chip_class == VI) {
-   /* Two EOP events are required to make all engines go idle
-* (and optional cache flushes executed) before the timestamp
-* is written.
-*/
+   if (ctx->chip_class >= GFX9) {
+   radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
+   radeon_emit(cs, op);
+   radeon_emit(cs, EOP_DATA_SEL(data_sel));
+   radeon_emit(cs, va);/* address lo */
+   radeon_emit(cs, va >> 32);  /* address hi */
+   radeon_emit(cs, new_fence); /* immediate data lo */
+   radeon_emit(cs, 0); /* immediate data hi */
+   radeon_emit(cs, 0); /* unused */
+   } else {
+   if (ctx->chip_class == CIK ||
+   ctx->chip_class == VI) {
+   /* Two EOP events are required to make all engines go 
idle
+* (and optional cache flushes executed) before the 
timestamp
+* is written.
+*/
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+   radeon_emit(cs, op);
+   radeon_emit(cs, va);
+   radeon_emit(cs, ((va >> 32) & 0x) | 
EOP_DATA_SEL(data_sel));
+   radeon_emit(cs, old_fence); /* immediate data */
+   radeon_emit(cs, 0); /* unused */
+   }
+
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, op);
radeon_emit(cs, va);
radeon_emit(cs, ((va >> 32) & 0x) | EOP_DATA_SEL(data_sel));
-   radeon_emit(cs, old_fence); /* immediate data */
+   radeon_emit(cs, new_fence); /* immediate data */
radeon_emit(cs, 0); /* unused */
}
 
-   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-   radeon_emit(cs, op);
-   radeon_emit(cs, va);
-   radeon_emit(cs, ((va >> 32) & 0x) | EOP_DATA_SEL(data_sel));
-   radeon_emit(cs, new_fence); /* immediate data */
-   radeon_emit(cs, 0); /* unused */
-
if (buf)
r600_emit_reloc(ctx, >gfx, buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
 }
 
 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
 {
unsigned dwords = 6;
 
if (screen->chip_class == CIK ||
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 079/140] radeonsi/gfx9: init_config changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/gfx9d.h  |  4 
 src/gallium/drivers/radeonsi/si_state.c | 38 +++--
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
index 1bc11b1..797bdcc 100644
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -7094,20 +7094,24 @@
 #define   S_028C40_LOAD_COLLISION_WAVEID(x)   
(((unsigned)(x) & 0x1) << 2)
 #define   G_028C40_LOAD_COLLISION_WAVEID(x)   (((x) >> 
2) & 0x1)
 #define   C_028C40_LOAD_COLLISION_WAVEID  
0xFFFB
 #define   S_028C40_LOAD_INTRAWAVE_COLLISION(x)
(((unsigned)(x) & 0x1) << 3)
 #define   G_028C40_LOAD_INTRAWAVE_COLLISION(x)(((x) >> 
3) & 0x1)
 #define   C_028C40_LOAD_INTRAWAVE_COLLISION   
0xFFF7
 #define R_028C44_PA_SC_BINNER_CNTL_0
0x028C44
 #define   S_028C44_BINNING_MODE(x)
(((unsigned)(x) & 0x03) << 0)
 #define   G_028C44_BINNING_MODE(x)(((x) >> 
0) & 0x03)
 #define   C_028C44_BINNING_MODE   
0xFFFC
+#define V_028C44_BINNING_ALLOWED   0
+#define V_028C44_FORCE_BINNING_ON  1
+#define V_028C44_DISABLE_BINNING_USE_NEW_SC
2
+#define V_028C44_DISABLE_BINNING_USE_LEGACY_SC 3
 #define   S_028C44_BIN_SIZE_X(x)  
(((unsigned)(x) & 0x1) << 2)
 #define   G_028C44_BIN_SIZE_X(x)  (((x) >> 
2) & 0x1)
 #define   C_028C44_BIN_SIZE_X 
0xFFFB
 #define   S_028C44_BIN_SIZE_Y(x)  
(((unsigned)(x) & 0x1) << 3)
 #define   G_028C44_BIN_SIZE_Y(x)  (((x) >> 
3) & 0x1)
 #define   C_028C44_BIN_SIZE_Y 
0xFFF7
 #define   S_028C44_BIN_SIZE_X_EXTEND(x)   
(((unsigned)(x) & 0x07) << 4)
 #define   G_028C44_BIN_SIZE_X_EXTEND(x)   (((x) >> 
4) & 0x07)
 #define   C_028C44_BIN_SIZE_X_EXTEND  
0xFF8F
 #define   S_028C44_BIN_SIZE_Y_EXTEND(x)   
(((unsigned)(x) & 0x07) << 7)
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 4cd0494..fa69b34 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -19,20 +19,21 @@
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *  Christian König 
  */
 
 #include "si_pipe.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "radeon/r600_cs.h"
 #include "radeon/r600_query.h"
 
 #include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "util/u_resource.h"
 #include "util/u_upload_mgr.h"
 
@@ -4123,36 +4124,46 @@ static void si_init_config(struct si_context *sctx)
   S_028230_ER_LINE_TB(0xA) |
   S_028230_ER_LINE_BT(0xA));
/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
 
-   si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
-   si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
-   si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
+   if (sctx->b.chip_class >= GFX9) {
+   si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
+   si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
+   si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
+   } else {
+   si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
+   si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
+   si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
+   }
 
if (sctx->b.chip_class >= CIK) {
/* If this is 0, Bonaire can hang even if GS isn't being used.
 * Other chips are unaffected. These are suboptimal values,
 * but we don't use on-chip GS.
 */
si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
  

[Mesa-dev] [PATCH 067/140] radeonsi/gfx9: add GFX9 and VEGA10 enums

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/amd_family.h   | 8 +---
 src/gallium/drivers/radeon/r600_pipe_common.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c| 4 +++-
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 7 ++-
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index b09bbb8..8a6dad6 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -85,28 +85,30 @@ enum radeon_family {
 CHIP_HAWAII,
 CHIP_MULLINS,
 CHIP_TONGA,
 CHIP_ICELAND,
 CHIP_CARRIZO,
 CHIP_FIJI,
 CHIP_STONEY,
 CHIP_POLARIS10,
 CHIP_POLARIS11,
 CHIP_POLARIS12,
+CHIP_VEGA10,
 CHIP_LAST,
 };
 
 enum chip_class {
 CLASS_UNKNOWN = 0,
 R300,
 R400,
 R500,
 R600,
 R700,
 EVERGREEN,
 CAYMAN,
-SI,
-CIK,
-VI,
+SI,  /* GFX6 */
+CIK, /* GFX7 */
+VI,  /* GFX8 */
+GFX9,
 };
 
 #endif
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index b3b925e..7ec5703 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -772,20 +772,21 @@ static const char* r600_get_chip_name(struct 
r600_common_screen *rscreen)
case CHIP_HAWAII: return "AMD HAWAII";
case CHIP_MULLINS: return "AMD MULLINS";
case CHIP_TONGA: return "AMD TONGA";
case CHIP_ICELAND: return "AMD ICELAND";
case CHIP_CARRIZO: return "AMD CARRIZO";
case CHIP_FIJI: return "AMD FIJI";
case CHIP_POLARIS10: return "AMD POLARIS10";
case CHIP_POLARIS11: return "AMD POLARIS11";
case CHIP_POLARIS12: return "AMD POLARIS12";
case CHIP_STONEY: return "AMD STONEY";
+   case CHIP_VEGA10: return "AMD VEGA10";
default: return "AMD unknown";
}
 }
 
 static void r600_disk_cache_create(struct r600_common_screen *rscreen)
 {
/* Don't use the cache if shader dumping is enabled. */
if (rscreen->debug_flags &
(DBG_FS | DBG_VS | DBG_TCS | DBG_TES | DBG_GS | DBG_PS | DBG_CS))
return;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 277fa28..c66203e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -744,21 +744,23 @@ static void si_handle_env_var_force_family(struct 
si_screen *sscreen)
unsigned i;
 
if (!family)
return;
 
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
if (!strcmp(family, r600_get_llvm_processor_name(i))) {
/* Override family and chip_class. */
sscreen->b.family = sscreen->b.info.family = i;
 
-   if (i >= CHIP_TONGA)
+   if (i >= CHIP_VEGA10)
+   sscreen->b.chip_class = 
sscreen->b.info.chip_class = GFX9;
+   else if (i >= CHIP_TONGA)
sscreen->b.chip_class = 
sscreen->b.info.chip_class = VI;
else if (i >= CHIP_BONAIRE)
sscreen->b.chip_class = 
sscreen->b.info.chip_class = CIK;
else
sscreen->b.chip_class = 
sscreen->b.info.chip_class = SI;
 
/* Don't submit any IBs. */
setenv("RADEON_NOOP", "1", 1);
return;
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 37e0140..25f08ef 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -212,21 +212,23 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int 
fd)
switch (ws->info.pci_id) {
 #define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = 
CHIP_##cfamily; break;
 #include "pci_ids/radeonsi_pci_ids.h"
 #undef CHIPSET
 
default:
   fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
   goto fail;
}
 
-   if (ws->info.family >= CHIP_TONGA)
+   if (ws->info.family >= CHIP_VEGA10)
+  ws->info.chip_class = GFX9;
+   else if (ws->info.family >= CHIP_TONGA)
   ws->info.chip_class = VI;
else if (ws->info.family >= CHIP_BONAIRE)
   ws->info.chip_class = CIK;
else if (ws->info.family >= CHIP_TAHITI)
   ws->info.chip_class = SI;
else {
   fprintf(stderr, "amdgpu: Unknown family.\n");
   goto fail;
}
 
@@ -296,20 +298,23 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int 
fd)
   ws->family = FAMILY_VI;
   ws->rev_id = VI_POLARIS10_P_A0;
   break;
case CHIP_POLARIS11:
   ws->family = FAMILY_VI;
   ws->rev_id = VI_POLARIS11_M_A0;
   break;
case CHIP_POLARIS12:
   ws->family = FAMILY_VI;
   ws->rev_id = 

[Mesa-dev] [PATCH 066/140] amd: GFX9 packet changes

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/r600d_common.h|  1 +
 src/amd/common/sid.h | 30 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c |  4 ++--
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/r600d_common.h b/src/amd/common/r600d_common.h
index a35108f..3fdfb7c 100644
--- a/src/amd/common/r600d_common.h
+++ b/src/amd/common/r600d_common.h
@@ -56,20 +56,21 @@
 #define WAIT_REG_MEM_MEM_SPACE(x)   (((unsigned)(x) & 0x3) << 4)
 #define PKT3_EVENT_WRITE   0x46
 #define PKT3_EVENT_WRITE_EOP   0x47
 #define EOP_DATA_SEL(x) ((x) << 29)
/* 0 - discard
 * 1 - send low 32bit data
 * 2 - send 64bit data
 * 3 - send 64bit GPU counter value
 * 4 - send 64bit sys counter value
 */
+#define PKT3_RELEASE_MEM   0x49 /* GFX9+ */
 #define PKT3_SET_CONFIG_REG   0x68
 #define PKT3_SET_CONTEXT_REG  0x69
 #define PKT3_STRMOUT_BASE_UPDATE  0x72 /* r700 only */
 #define PKT3_SURFACE_BASE_UPDATE   0x73 /* r600 only */
 #defineSURFACE_BASE_UPDATE_DEPTH  (1 << 0)
 #defineSURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
 #defineSURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
 #defineSURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
 #define PKT3_SET_SH_REG0x76 /* SI and later */
 #define PKT3_SET_UCONFIG_REG   0x79 /* CIK and later */
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 61e1406..e0c3a02 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -43,21 +43,30 @@
 #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS   0x20
 #defineEVENT_TYPE(x)   ((x) << 0)
 #defineEVENT_INDEX(x)  ((x) << 8)
 /* 0 - any non-TS event
 * 1 - ZPASS_DONE
 * 2 - SAMPLE_PIPELINESTAT
 * 3 - SAMPLE_STREAMOUTSTAT*
 * 4 - *S_PARTIAL_FLUSH
 * 5 - TS events
 */
-#define EVENT_WRITE_INV_L2   0x10
+
+/* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
+#define EVENT_TCL1_VOL_ACTION_ENA  (1 << 12)
+#define EVENT_TC_VOL_ACTION_ENA(1 << 13)
+#define EVENT_TC_WB_ACTION_ENA (1 << 15)
+#define EVENT_TCL1_ACTION_ENA  (1 << 16)
+#define EVENT_TC_ACTION_ENA(1 << 17)
+#define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */
+#define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */
+#define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */
 
 
 #define PREDICATION_OP_CLEAR 0x0
 #define PREDICATION_OP_ZPASS 0x1
 #define PREDICATION_OP_PRIMCOUNT 0x2
 #define PREDICATION_OP_BOOL64 0x3
 
 #define PRED_OP(x) ((x) << 16)
 
 #define PREDICATION_CONTINUE (1 << 31)
@@ -85,21 +94,21 @@
 #define PKT3_COND_EXEC 0x22
 #define PKT3_PRED_EXEC 0x23
 #define PKT3_DRAW_INDIRECT 0x24
 #define PKT3_DRAW_INDEX_INDIRECT   0x25
 #define PKT3_INDEX_BASE0x26
 #define PKT3_DRAW_INDEX_2  0x27
 #define PKT3_CONTEXT_CONTROL   0x28
 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
 #define CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
-#define PKT3_INDEX_TYPE0x2A
+#define PKT3_INDEX_TYPE0x2A /* not on GFX9 */
 #define PKT3_DRAW_INDIRECT_MULTI   0x2C
 #define   R_2C3_DRAW_INDEX_LOC  0x2C3
 #define S_2C3_COUNT_INDIRECT_ENABLE(x)  (((unsigned)(x) & 0x1) << 30)
 #define S_2C3_DRAW_INDEX_ENABLE(x)  (((unsigned)(x) & 0x1) << 31)
 #define PKT3_DRAW_INDEX_AUTO   0x2D
 #define PKT3_DRAW_INDEX_IMMD   0x2E /* not on CIK */
 #define PKT3_NUM_INSTANCES 0x2F
 #define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
 #define PKT3_INDIRECT_BUFFER_SI0x32 /* not on CIK */
 #define PKT3_INDIRECT_BUFFER_CONST 0x33
@@ -146,27 +155,28 @@
 #define COPY_DATA_PERF  4
 #define COPY_DATA_IMM   5
 #defineCOPY_DATA_DST_SEL(x)(((unsigned)(x) & 0xf) 
<< 8)
 #defineCOPY_DATA_COUNT_SEL (1 << 16)
 #defineCOPY_DATA_WR_CONFIRM(1 << 20)
 #define PKT3_PFP_SYNC_ME  0x42
 #define PKT3_SURFACE_SYNC  0x43 /* deprecated on CIK, use 

[Mesa-dev] [PATCH 072/140] radeonsi/gfx9: enable SDMA buffer copying & clearing

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/cik_sdma.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index fdcf22f..500afb0 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -50,21 +50,21 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
src_offset += rsrc->gpu_address;
 
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
r600_need_dma_space(>b, ncopy * 7, rdst, rsrc);
 
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
0));
-   radeon_emit(cs, csize);
+   radeon_emit(cs, ctx->b.chip_class >= GFX9 ? csize - 1 : csize);
radeon_emit(cs, 0); /* src/dst endian swap */
radeon_emit(cs, src_offset);
radeon_emit(cs, src_offset >> 32);
radeon_emit(cs, dst_offset);
radeon_emit(cs, dst_offset >> 32);
dst_offset += csize;
src_offset += csize;
size -= csize;
}
 }
@@ -96,21 +96,21 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx,
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
r600_need_dma_space(>b, ncopy * 5, rdst, NULL);
 
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 
0,
0x8000 /* dword copy */));
radeon_emit(cs, offset);
radeon_emit(cs, offset >> 32);
radeon_emit(cs, clear_value);
-   radeon_emit(cs, csize);
+   radeon_emit(cs, sctx->b.chip_class >= GFX9 ? csize - 1 : csize);
offset += csize;
size -= csize;
}
 }
 
 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned 
blk_w)
 {
width = u_minify(width, level);
return DIV_ROUND_UP(width, blk_w);
 }
@@ -527,21 +527,22 @@ static void cik_sdma_copy(struct pipe_context *ctx,
struct si_context *sctx = (struct si_context *)ctx;
 
if (!sctx->b.dma.cs)
goto fallback;
 
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width);
return;
}
 
-   if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
+   if ((sctx->b.chip_class == CIK || sctx->b.chip_class == VI) &&
+   cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
  src, src_level, src_box))
return;
 
 fallback:
si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
 }
 
 void cik_init_sdma_functions(struct si_context *sctx)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 086/140] radeonsi/gfx9: add a scissor bug workaround

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1ff1547..8c6e9cd 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1162,20 +1162,26 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
 
si_need_cs_space(sctx);
 
/* Since we've called r600_context_add_resource_size for vertex buffers,
 * this must be called after si_need_cs_space, because we must let
 * need_cs_space flush before we add buffers to the buffer list.
 */
if (!si_upload_vertex_buffer_descriptors(sctx))
return;
 
+   /* GFX9 scissor bug workaround. There is also a more efficient but
+* more involved alternative workaround. */
+   if (sctx->b.chip_class == GFX9 &&
+   si_is_atom_dirty(sctx, >b.scissors.atom))
+   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
/* Flush caches before the first state atom, which does L2 prefetches. 
*/
if (sctx->b.flags)
si_emit_cache_flush(sctx);
 
/* Emit state atoms. */
mask = sctx->dirty_atoms;
while (mask) {
struct r600_atom *atom = sctx->atoms.array[u_bit_scan()];
 
atom->emit(>b, atom);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 084/140] radeonsi/gfx9: disable the 2-bit format fetch fix

2017-03-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index b7ba267..eacb1b4 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3335,20 +3335,21 @@ static void si_delete_sampler_state(struct pipe_context 
*ctx, void *state)
 }
 
 /*
  * Vertex elements & buffers
  */
 
 static void *si_create_vertex_elements(struct pipe_context *ctx,
   unsigned count,
   const struct pipe_vertex_element 
*elements)
 {
+   struct si_screen *sscreen = (struct si_screen*)ctx->screen;
struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
bool used[SI_NUM_VERTEX_BUFFERS] = {};
int i;
 
assert(count <= SI_MAX_ATTRIBS);
if (!v)
return NULL;
 
v->count = count;
v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT);
@@ -3374,23 +3375,26 @@ static void *si_create_vertex_elements(struct 
pipe_context *ctx,
desc = util_format_description(elements[i].src_format);
first_non_void = 
util_format_get_first_non_void_channel(elements[i].src_format);
data_format = si_translate_buffer_dataformat(ctx->screen, desc, 
first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, 
first_non_void);
channel = first_non_void >= 0 ? >channel[first_non_void] 
: NULL;
memcpy(swizzle, desc->swizzle, sizeof(swizzle));
 
v->format_size[i] = desc->block.bits / 8;
 
/* The hardware always treats the 2-bit alpha channel as
-* unsigned, so a shader workaround is needed.
+* unsigned, so a shader workaround is needed. The affected
+* chips are VI and older except Stoney (GFX8.1).
 */
-   if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
+   if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 &&
+   sscreen->b.chip_class <= VI &&
+   sscreen->b.family != CHIP_STONEY) {
if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM;
} else if (num_format == 
V_008F0C_BUF_NUM_FORMAT_SSCALED) {
v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED;
} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
/* This isn't actually used in OpenGL. */
v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT;
}
} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   4   >