Offers three clocks, device, clock monotonic and clock monotonic
raw. Could use some kernel support to reduce the deviation between
clock values.

v2:
        Ensure deviation is at least as big as the GPU time interval.

v3:
        Set device->lost when returning DEVICE_LOST.
        Use MAX2 and DIV_ROUND_UP instead of open coding these.
        Delete spurious TIMESTAMP in radv version.

        Suggested-by: Jason Ekstrand <ja...@jlekstrand.net>
        Suggested-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>

v4:
        Add anv_gem_reg_read to anv_gem_stubs.c

        Suggested-by: Jason Ekstrand <ja...@jlekstrand.net>

v5:
        Adjust maxDeviation computation to max(sampled_clock_period) +
        sample_interval.

        Suggested-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
        Suggested-by: Jason Ekstrand <ja...@jlekstrand.net>

Signed-off-by: Keith Packard <kei...@keithp.com>
---
 src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++
 src/amd/vulkan/radv_extensions.py  |   1 +
 src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++
 src/intel/vulkan/anv_extensions.py |   1 +
 src/intel/vulkan/anv_gem.c         |  13 +++
 src/intel/vulkan/anv_gem_stubs.c   |   7 ++
 src/intel/vulkan/anv_private.h     |   2 +
 7 files changed, 270 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 174922780fc..4a705a724ef 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
                               VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
                               VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+       VK_TIME_DOMAIN_DEVICE_EXT,
+       VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+       VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+       VkPhysicalDevice                             physicalDevice,
+       uint32_t                                     *pTimeDomainCount,
+       VkTimeDomainEXT                              *pTimeDomains)
+{
+       int d;
+       VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+       for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+               vk_outarray_append(&out, i) {
+                       *i = radv_time_domains[d];
+               }
+       }
+
+       return vk_outarray_status(&out);
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+       struct timespec current;
+       int ret;
+
+       ret = clock_gettime(clock_id, &current);
+       if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+               ret = clock_gettime(CLOCK_MONOTONIC, &current);
+       if (ret < 0)
+               return 0;
+
+       return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+VkResult radv_GetCalibratedTimestampsEXT(
+       VkDevice                                     _device,
+       uint32_t                                     timestampCount,
+       const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
+       uint64_t                                     *pTimestamps,
+       uint64_t                                     *pMaxDeviation)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       uint32_t clock_crystal_freq = 
device->physical_device->rad_info.clock_crystal_freq;
+       int d;
+       uint64_t begin, end;
+        uint64_t max_clock_period = 0;
+
+       begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+       for (d = 0; d < timestampCount; d++) {
+               switch (pTimestampInfos[d].timeDomain) {
+               case VK_TIME_DOMAIN_DEVICE_EXT:
+                       pTimestamps[d] = device->ws->query_value(device->ws,
+                                                                
RADEON_TIMESTAMP);
+                        uint64_t device_period = DIV_ROUND_UP(1000000, 
clock_crystal_freq);
+                        max_clock_period = MAX2(max_clock_period, 
device_period);
+                       break;
+               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+                       pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+                        max_clock_period = MAX2(max_clock_period, 1);
+                       break;
+
+               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+                       pTimestamps[d] = begin;
+                       break;
+               default:
+                       pTimestamps[d] = 0;
+                       break;
+               }
+       }
+
+       end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+        /*
+         * The maximum deviation is the sum of the interval over which we
+         * perform the sampling and the maximum period of any sampled
+         * clock. That's because the maximum skew between any two sampled
+         * clock edges is when the sampled clock with the largest period is
+         * sampled at the end of that period but right at the beginning of the
+         * sampling interval and some other clock is sampled right at the
+         * begining of its sampling period and right at the end of the
+         * sampling interval. Let's assume the GPU has the longest clock
+         * period and that the application is sampling GPU and monotonic:
+         *
+         *                               s                 e
+         *                      w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+         *     Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+         *
+         *                               g
+         *               0         1         2         3
+         *     GPU       -----_____-----_____-----_____-----_____
+         *
+         *                                                m
+         *                                         x y z 0 1 2 3 4 5 6 7 8 9 a 
b c
+         *     Monotonic                           
-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+         *
+         *     Interval                     <----------------->
+         *     Deviation           <-------------------------->
+         *
+         *             s  = read(raw)       2
+         *             g  = read(GPU)       1
+         *             m  = read(monotonic) 2
+         *             e  = read(raw)       b
+         *
+         * We round the sample interval up by one tick to cover sampling error
+         * in the interval clock
+         */
+
+        uint64_t sample_interval = end - begin + 1;
+
+        *pMaxDeviation = sample_interval + max_clock_period;
+
+       return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 5dcedae1c63..4c81d3f0068 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -92,6 +92,7 @@ EXTENSIONS = [
     Extension('VK_KHR_display',                          23, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_direct_mode_display',               1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_acquire_xlib_display',              1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+    Extension('VK_EXT_calibrated_timestamps',             1, True),
     Extension('VK_EXT_conditional_rendering',             1, True),
     Extension('VK_EXT_conservative_rasterization',        1, 
'device->rad_info.chip_class >= GFX9'),
     Extension('VK_EXT_display_surface_counter',           1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index a2551452eb1..076ff3a57f6 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(
    vk_free2(&device->alloc, pAllocator, fb);
 }
 
+static const VkTimeDomainEXT anv_time_domains[] = {
+   VK_TIME_DOMAIN_DEVICE_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+   VkPhysicalDevice                             physicalDevice,
+   uint32_t                                     *pTimeDomainCount,
+   VkTimeDomainEXT                              *pTimeDomains)
+{
+   int d;
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+   for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
+      vk_outarray_append(&out, i) {
+         *i = anv_time_domains[d];
+      }
+   }
+
+   return vk_outarray_status(&out);
+}
+
+static uint64_t
+anv_clock_gettime(clockid_t clock_id)
+{
+   struct timespec current;
+   int ret;
+
+   ret = clock_gettime(clock_id, &current);
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+      ret = clock_gettime(CLOCK_MONOTONIC, &current);
+   if (ret < 0)
+      return 0;
+
+   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+#define TIMESTAMP 0x2358
+
+VkResult anv_GetCalibratedTimestampsEXT(
+   VkDevice                                     _device,
+   uint32_t                                     timestampCount,
+   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
+   uint64_t                                     *pTimestamps,
+   uint64_t                                     *pMaxDeviation)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   uint64_t timestamp_frequency = device->info.timestamp_frequency;
+   int  ret;
+   int d;
+   uint64_t begin, end;
+   uint64_t max_clock_period = 0;
+
+   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   for (d = 0; d < timestampCount; d++) {
+      switch (pTimestampInfos[d].timeDomain) {
+      case VK_TIME_DOMAIN_DEVICE_EXT:
+         ret = anv_gem_reg_read(device, TIMESTAMP | 1,
+                                &pTimestamps[d]);
+
+         if (ret != 0) {
+            device->lost = TRUE;
+            return VK_ERROR_DEVICE_LOST;
+         }
+         uint64_t device_period = DIV_ROUND_UP(1000000000, 
timestamp_frequency);
+         max_clock_period = MAX2(max_clock_period, device_period);
+         break;
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
+         max_clock_period = MAX2(max_clock_period, 1);
+         break;
+
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+         pTimestamps[d] = begin;
+         break;
+      default:
+         pTimestamps[d] = 0;
+         break;
+      }
+   }
+
+   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+    /*
+     * The maximum deviation is the sum of the interval over which we
+     * perform the sampling and the maximum period of any sampled
+     * clock. That's because the maximum skew between any two sampled
+     * clock edges is when the sampled clock with the largest period is
+     * sampled at the end of that period but right at the beginning of the
+     * sampling interval and some other clock is sampled right at the
+     * begining of its sampling period and right at the end of the
+     * sampling interval. Let's assume the GPU has the longest clock
+     * period and that the application is sampling GPU and monotonic:
+     *
+     *                               s                 e
+     *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+     * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+     *
+     *                               g
+     *           0         1         2         3
+     * GPU       -----_____-----_____-----_____-----_____
+     *
+     *                                                m
+     *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c
+     * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+     *
+     * Interval                     <----------------->
+     * Deviation           <-------------------------->
+     *
+     *         s  = read(raw)       2
+     *         g  = read(GPU)       1
+     *         m  = read(monotonic) 2
+     *         e  = read(raw)       b
+     *
+     * We round the sample interval up by one tick to cover sampling error
+     * in the interval clock
+     */
+
+   uint64_t sample_interval = end - begin + 1;
+
+   *pMaxDeviation = sample_interval + max_clock_period;
+
+   return VK_SUCCESS;
+}
+
 /* vk_icd.h does not declare this function, so we declare it here to
  * suppress Wmissing-prototypes.
  */
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index d4915c95013..a8535964da7 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -126,6 +126,7 @@ EXTENSIONS = [
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
     Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen 
>= 9'),
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen 
>= 9'),
+    Extension('VK_EXT_calibrated_timestamps',             1, True),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index c43b5ef9e06..1bdf040c1a3 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)
    return args.handle;
 }
 
+int
+anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)
+{
+   struct drm_i915_reg_read args = {
+      .offset = offset
+   };
+
+   int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
+
+   *result = args.val;
+   return ret;
+}
+
 #ifndef SYNC_IOC_MAGIC
 /* duplicated from linux/sync_file.h to avoid build-time dependency
  * on new (v4.7) kernel headers.  Once distro's are mostly using
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index 5093bd5db1a..8cc3ad1f22e 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
 {
    unreachable("Unused");
 }
+
+int
+anv_gem_reg_read(struct anv_device *device,
+                 uint32_t offset, uint64_t *result)
+{
+   unreachable("Unused");
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 599b903f25c..08376b00c8e 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
                                 uint32_t *active, uint32_t *pending);
 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
+int anv_gem_reg_read(struct anv_device *device,
+                     uint32_t offset, uint64_t *result);
 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, 
uint32_t caching);
 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
-- 
2.19.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to