Re: [PATCH v6] drm: Optimise for continuous memory allocation

2022-12-23 Thread Dan Carpenter
Hi xinhui,

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/xinhui-pan/drm-Optimise-for-continuous-memory-allocation/20221218-145922
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20221218065708.93332-1-xinhui.pan%40amd.com
patch subject: [PATCH v6] drm: Optimise for continuous memory allocation
config: s390-randconfig-m041-20221218
compiler: s390-linux-gcc (GCC) 12.1.0

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Reported-by: Dan Carpenter 

smatch warnings:
drivers/gpu/drm/drm_buddy.c:501 find_continuous_blocks() error: uninitialized 
symbol 'block'.

vim +/block +501 drivers/gpu/drm/drm_buddy.c

8a257b57bc11a2 xinhui pan 2022-12-18  472  static struct drm_buddy_block *
8a257b57bc11a2 xinhui pan 2022-12-18  473  find_continuous_blocks(struct 
drm_buddy *mm,
8a257b57bc11a2 xinhui pan 2022-12-18  474  int order,
8a257b57bc11a2 xinhui pan 2022-12-18  475  unsigned long 
flags,
8a257b57bc11a2 xinhui pan 2022-12-18  476  struct 
drm_buddy_block **lb)
8a257b57bc11a2 xinhui pan 2022-12-18  477  {
8a257b57bc11a2 xinhui pan 2022-12-18  478   struct list_head *head = 
&mm->free_list[order - 1];
8a257b57bc11a2 xinhui pan 2022-12-18  479   struct drm_buddy_block 
*free_block, *first = NULL, *last = NULL;
8a257b57bc11a2 xinhui pan 2022-12-18  480  
8a257b57bc11a2 xinhui pan 2022-12-18  481   /*
8a257b57bc11a2 xinhui pan 2022-12-18  482* Look for continuous free 
memory in buddy and buddy-in-law.
8a257b57bc11a2 xinhui pan 2022-12-18  483* IOW, the most left blocks at 
right of free block and the most right
8a257b57bc11a2 xinhui pan 2022-12-18  484* blocks at left of free block.
8a257b57bc11a2 xinhui pan 2022-12-18  485*/
8a257b57bc11a2 xinhui pan 2022-12-18  486  
8a257b57bc11a2 xinhui pan 2022-12-18  487   list_for_each_entry(free_block, 
head, link) {
8a257b57bc11a2 xinhui pan 2022-12-18  488   struct drm_buddy_block 
*buddy, *parent, *block;
8a257b57bc11a2 xinhui pan 2022-12-18  489   int left, min_order = 0;
8a257b57bc11a2 xinhui pan 2022-12-18  490   LIST_HEAD(fbl);
8a257b57bc11a2 xinhui pan 2022-12-18  491  
8a257b57bc11a2 xinhui pan 2022-12-18  492   parent = 
free_block->parent;
8a257b57bc11a2 xinhui pan 2022-12-18  493   if (!parent)
8a257b57bc11a2 xinhui pan 2022-12-18  494   continue;
8a257b57bc11a2 xinhui pan 2022-12-18  495  
8a257b57bc11a2 xinhui pan 2022-12-18  496   left = parent->left == 
free_block;
8a257b57bc11a2 xinhui pan 2022-12-18  497   
list_add(&free_block->tmp_link, &fbl);
8a257b57bc11a2 xinhui pan 2022-12-18  498   buddy = 
__get_buddy(free_block);
8a257b57bc11a2 xinhui pan 2022-12-18  499   
__continuous_block_in_tree(buddy, &fbl, left, min_order);
8a257b57bc11a2 xinhui pan 2022-12-18  500  
8a257b57bc11a2 xinhui pan 2022-12-18 @501   while (parent && 
!((parent->left == block) ^ left)) {

^
Not initialized on first iteration.

8a257b57bc11a2 xinhui pan 2022-12-18  502   block = parent;
8a257b57bc11a2 xinhui pan 2022-12-18  503   parent = 
parent->parent;
8a257b57bc11a2 xinhui pan 2022-12-18  504   }
8a257b57bc11a2 xinhui pan 2022-12-18  505  
8a257b57bc11a2 xinhui pan 2022-12-18  506   if (!parent)
8a257b57bc11a2 xinhui pan 2022-12-18  507   continue;
8a257b57bc11a2 xinhui pan 2022-12-18  508  
8a257b57bc11a2 xinhui pan 2022-12-18  509   buddy = 
__get_buddy(block);
8a257b57bc11a2 xinhui pan 2022-12-18  510   
__continuous_block_in_tree(buddy, &fbl, !left, min_order);
8a257b57bc11a2 xinhui pan 2022-12-18  511  
8a257b57bc11a2 xinhui pan 2022-12-18  512   /* list head of fbl is 
invalid outside.
8a257b57bc11a2 xinhui pan 2022-12-18  513* Walk through list 
from first fo last only.
8a257b57bc11a2 xinhui pan 2022-12-18  514*/
8a257b57bc11a2 xinhui pan 2022-12-18  515   if 
(__free_block_in_order(&fbl, free_block, order, &first, &last))
8a257b57bc11a2 xinhui pan 2022-12-18  516   break;
8a257b57bc11a2 xinhui pan 2022-12-18  517   }
8a257b57bc11a2 xinhui pan 2022-12-18  518  
8a257b57bc11a2 xinhui pan 2022-12-18  519   *lb = last;
8a257b57bc11a2 xinhui pan 2022-12-18  520   return first;
8a257b57bc11a2 xinhui pan 2022-12-18  521  }

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp



[PATCH v2] drm/amdgpu: Retry DDC probing on DVI on failure if we got an HPD interrupt

2022-12-23 Thread xurui
HPD signals on DVI ports can be fired off before the pins required for
DDC probing actually make contact, due to the pins for HPD making
contact first. This results in a HPD signal being asserted but DDC
probing failing, resulting in hotplugging occasionally failing.

Rescheduling the hotplug work for a second when we run into an HPD
signal with a failing DDC probe usually gives enough time for the rest
of the connector's pins to make contact, and fixes this issue.

Signed-off-by: xurui 
Reported-by: kernel test robot
---
V1 -> V2: Fixed a compilation error

 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_connectors.c| 22 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c|  6 ++---
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c|  6 ++---
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c |  6 ++---
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c |  6 ++---
 8 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6b74df446694..b1d901fe578e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -870,7 +870,7 @@ struct amdgpu_device {
struct amdgpu_vkms_output   *amdgpu_vkms_output;
struct amdgpu_mode_info mode_info;
/* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
-   struct work_struct  hotplug_work;
+   struct delayed_work hotplug_work;
struct amdgpu_irq_src   crtc_irq;
struct amdgpu_irq_src   vline0_irq;
struct amdgpu_irq_src   vupdate_irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 2ebbc6382a06..d2abd334b1b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -996,13 +996,33 @@ amdgpu_connector_dvi_detect(struct drm_connector 
*connector, bool force)
}
}
 
+   if (amdgpu_connector->detected_hpd_without_ddc) {
+   force = true;
+   amdgpu_connector->detected_hpd_without_ddc = false;
+   }
+
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
goto exit;
}
 
-   if (amdgpu_connector->ddc_bus)
+   if (amdgpu_connector->ddc_bus) {
dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+
+   /* Sometimes the pins required for the DDC probe on DVI
+* connectors don't make contact at the same time that the ones
+* for HPD do. If the DDC probe fails even though we had an HPD
+* signal, try again later
+*/
+   if (!dret && !force &&
+   amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+   DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 
second\n");
+   amdgpu_connector->detected_hpd_without_ddc = true;
+   schedule_delayed_work(&adev->hotplug_work,
+ msecs_to_jiffies(1000));
+   goto exit;
+   }
+   }
if (dret) {
amdgpu_connector->detected_by_load = false;
amdgpu_connector_free_edid(connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b22471b3bd63..a876648e3d7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -63,7 +63,7 @@
 void amdgpu_display_hotplug_work_func(struct work_struct *work)
 {
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- hotplug_work);
+ hotplug_work.work);
struct drm_device *dev = adev_to_drm(adev);
struct drm_mode_config *mode_config = &dev->mode_config;
struct drm_connector *connector;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 8a39300b1a84..93c73faa5714 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -534,6 +534,7 @@ struct amdgpu_connector {
void *con_priv;
bool dac_load_detect;
bool detected_by_load; /* if the connection status was determined by 
load */
+   bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, 
but ddc probing failed */
uint16_t connector_object_id;
struct amdgpu_hpd hpd;
struct amdgpu_router router;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.

Re: amdgpu refcount saturation

2022-12-23 Thread Michal Kubecek
On Mon, Dec 19, 2022 at 09:23:05AM +0100, Christian König wrote:
> Am 17.12.22 um 12:53 schrieb Borislav Petkov:
> > Hi folks,
> > 
> > this is with Linus' tree from Wed:
> > 
> > 041fae9c105a ("Merge tag 'f2fs-for-6.2-rc1' of 
> > git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs")
> > 
> > on a CZ laptop:
> > 
> > [7.782901] [drm] initializing kernel modesetting (CARRIZO 0x1002:0x9874 
> > 0x103C:0x807E 0xC4)
> > 
> > The splat is kinda messy:
> 
> Thanks for the notice, going to take a look today.
> 
> Regards,
> Christian.

In case it might help, I have similar crashes with 6.2 merge window
snapshots on a desktop machine with Radeon WX2100

[   16.045850] [drm] initializing kernel modesetting (POLARIS12 
0x1002:0x6995 0x1002:0x0B0C 0x00).

The behavior seems pretty deterministic so far, the system boots
cleanly, login into KDE is fine but then it crashes as soon as I start
firefox.

Unfortunately, just like Boris, I always seem to have multiple stack
traces tangled together.

Michal


Commit 77856d911a8c:
--
[  165.210008] [ cut here ]
[  165.215427] refcount_t: underflow; use-after-free.
[  165.221026] WARNING: CPU: 14 PID: 1165 at lib/refcount.c:28 
refcount_warn_saturate+0xba/0x110
[  165.230420] Modules linked in: echainiv esp4 af_packet tun 8021q garp mrp 
stp llc iscsi_ibft iscsi_boot_sysfs xt_REDIRECT xt_MASQUERADE xt_nat 
iptable_nat nf_nat deflate sm4_generic sm4_aesni_avx2_x86_64 xt_LOG 
sm4_aesni_avx_x86_64 nf_log_syslog sm4 twofish_generic twofish_avx_x86_64 
twofish_x86_64_3way twofish_x86_64 twofish_common camellia_generic xt_conntrack 
camellia_aesni_avx2 nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 
camellia_aesni_avx_x86_64 camellia_x86_64 serpent_avx2 serpent_avx_x86_64 
serpent_sse2_x86_64 serpent_generic blowfish_generic blowfish_x86_64 vmnet(OE) 
blowfish_common ppdev parport_pc parport cast5_avx_x86_64 
vmw_vsock_vmci_transport cast5_generic cast_common ipt_REJECT nf_reject_ipv4 
vsock des_generic libdes sm3_generic rfkill xt_tcpudp sm3_avx_x86_64 sm3 xt_set 
vmw_vmci cmac xcbc iptable_filter vmmon(OE) rmd160 bpfilter dmi_sysfs 
ip_set_hash_ip af_key ip_set xfrm_algo nfnetlink msr hwmon_vid dm_crypt essiv 
authenc trusted asn1_encoder tee amdgpu
[  165.230464]  intel_rapl_msr uvcvideo videobuf2_vmalloc iommu_v2 
videobuf2_memops drm_buddy i2c_dev videobuf2_v4l2 gpu_sched video 
intel_rapl_common snd_usb_audio videodev xfs drm_display_helper 
videobuf2_common snd_usbmidi_lib drm_ttm_helper ttm libcrc32c edac_mce_amd 
joydev mc irqbypass cec pcspkr wmi_bmof gigabyte_wmi k10temp i2c_piix4 
tiny_power_button rc_core igb dca thermal button acpi_cpufreq fuse configfs 
ip_tables x_tables ext4 mbcache jbd2 hid_generic uas usb_storage usbhid 
crct10dif_pclmul crc32_pclmul crc32c_intel xhci_pci polyval_clmulni 
xhci_pci_renesas polyval_generic gf128mul xhci_hcd ghash_clmulni_intel 
sha512_ssse3 aesni_intel crypto_simd nvme cryptd usbcore ccp sr_mod sp5100_tco 
cdrom nvme_core wmi snd_emu10k1 snd_hwdep snd_util_mem snd_ac97_codec ac97_bus 
snd_pcm snd_timer snd_rawmidi snd_seq_device snd soundcore sg dm_multipath 
dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua
[  165.339552] [ cut here ]
[  165.339552] [ cut here ]
[  165.339553] refcount_t: saturated; leaking memory.
[  165.339557] WARNING: CPU: 18 PID: 6237 at lib/refcount.c:19 
refcount_warn_saturate+0x97/0x110
[  165.339562] Modules linked in: echainiv esp4 af_packet tun 8021q garp mrp 
stp llc iscsi_ibft iscsi_boot_sysfs xt_REDIRECT xt_MASQUERADE xt_nat 
iptable_nat nf_nat deflate sm4_generic sm4_aesni_avx2_x86_64 xt_LOG 
sm4_aesni_avx_x86_64 nf_log_syslog sm4 twofish_generic twofish_avx_x86_64 
twofish_x86_64_3way twofish_x86_64 twofish_common camellia_generic xt_conntrack 
camellia_aesni_avx2 nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 
camellia_aesni_avx_x86_64 camellia_x86_64 serpent_avx2 serpent_avx_x86_64 
serpent_sse2_x86_64 serpent_generic blowfish_generic blowfish_x86_64 vmnet(OE) 
blowfish_common ppdev parport_pc parport cast5_avx_x86_64 
vmw_vsock_vmci_transport cast5_generic cast_common ipt_REJECT nf_reject_ipv4 
vsock des_generic libdes sm3_generic rfkill xt_tcpudp sm3_avx_x86_64 sm3 xt_set 
vmw_vmci cmac xcbc iptable_filter vmmon(OE) rmd160 bpfilter dmi_sysfs 
ip_set_hash_ip af_key ip_set xfrm_algo nfnetlink msr hwmon_vid dm_crypt essiv 
authenc trusted asn1_encoder tee amdgpu
[  165.339588]  intel_rapl_msr uvcvideo videobuf2_vmalloc iommu_v2 
videobuf2_memops drm_buddy i2c_dev videobuf2_v4l2 gpu_sched video 
intel_rapl_common snd_usb_audio videodev xfs drm_display_helper 
videobuf2_common snd_usbmidi_lib drm_ttm_helper ttm libcrc32c edac_mce_amd 
joydev mc irqbypass cec pcspkr wmi_bmof gigabyte_wmi k10temp i2c_piix4 
tiny_power_button rc_core igb dca thermal button acpi_cpufreq fuse configfs 
ip_tables x_tables ext4 mbcache jbd2 hid_generic uas usb_storage usbhid 

Re: [PATCH] drm/amdgpu: grab extra fence reference for drm_sched_job_add_dependency

2022-12-23 Thread Michal Kubecek
On Mon, Dec 19, 2022 at 11:47:18AM +0100, Christian König wrote:
> That function consumes the reference.
> 
> Signed-off-by: Christian König 
> Fixes: aab9cf7b6954 ("drm/amdgpu: use scheduler dependencies for VM updates")

Tested-by: Michal Kubecek 

I can still see weird artefacts in some windows (firefox, konsole) but
those are probably unrelated, the refcount errors are gone with this patch.

Michal

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index 59cf64216fbb..535cd6569bcc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -238,8 +238,10 @@ static int amdgpu_vm_sdma_update(struct 
> amdgpu_vm_update_params *p,
>   /* Wait for PD/PT moves to be completed */
>   dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
>   dma_resv_for_each_fence_unlocked(&cursor, fence) {
> + dma_fence_get(fence);
>   r = drm_sched_job_add_dependency(&p->job->base, fence);
>   if (r) {
> + dma_fence_put(fence);
>   dma_resv_iter_end(&cursor);
>   return r;
>   }
> -- 
> 2.34.1
> 


signature.asc
Description: PGP signature


Re: amdgpu refcount saturation

2022-12-23 Thread Borislav Petkov
On Thu, Dec 22, 2022 at 10:20:37PM +0100, Michal Kubecek wrote:
> Unfortunately, just like Boris, I always seem to have multiple stack
> traces tangled together.

See if this fixes it:

https://lore.kernel.org/r/20221219104718.21677-1-christian.koe...@amd.com

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette


Re: [PATCH 0/2] Recover from failure to probe GPU

2022-12-23 Thread Mario Limonciello

On 12/22/22 13:41, Javier Martinez Canillas wrote:

[adding Thomas Zimmermann to CC list]

Hello Mario,

Interesting case.

On 12/22/22 19:30, Mario Limonciello wrote:

One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`



The reason why that's done at the very beginning is that there are no
guarantees that the firmware-provided framebuffer would keep working
after the real display controller driver re-initializes the IP block.


This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.



Right. That's a problem indeed but as mentioned there's a gap between
the firmware-provided framebuffer is removed and the real driver sets
up its framebuffer.
  

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 21.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 21.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.


s/21.10/22.10/



This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.



I'm not familiar with AMD GPUs, but could be possible that this discovery
and firmware loading step be done at the beginning before the firmware FB
is removed ? That way the FB removal will not happen unless that succeeds.


Possible?  I think so, but maybe Alex can comment on this after the 
holidays as he's more familiar.


It would mean splitting and introducing an entirely new phase to driver 
initialization.  The information about the discovery table comes from VRAM.


amdgpu_driver_load_kms -> amdgpu_device_init -> amdgpu_device_ip_early_init

Basically that code specific would have to call earlier and then there 
would need to be a separate set of code for all the IP blocks to *just* 
collect what firmware they need.


  

To help the situation, allow drivers to re-run the init process for the
firmware framebuffer during a failed probe. As this problem is most
pronounced with amdgpu, this is the only driver changed.

But if this makes sense more generally for other KMS drivers, the call
can be added to the cleanup routine for those too.



The problem I see is that depending on how far the driver's probe function
went, there may not be possible to re-run the init process. Since firmware
provided framebuffer may already been destroyed or the IP block just be in
a half initialized state.

I'm not against this series if it solves the issue in practice for amdgpu,
but don't think is a general solution and would like to know Thomas' opinion
on this before as well


Running on this idea I'm pretty sure that request_firmware returns 
-ENOENT in this case. So another proposal for when to trigger this flow 
would be to only do it on -ENOENT.  We could then also change 
amdgpu_discovery.c to return -ENOENT when an IP block isn't supported 
instead of the current -EINVAL.


Or we could instead co-opt -ENOTSUPP and remap all the cases that we 
explicitly want the system framebuffer to re-initialize to that.


Re: [PATCH 2/2] drm/amd: Re-create firmware framebuffer on failure to probe

2022-12-23 Thread Ernst Sjöstrand
What about a system with multiple GPUs?
Hybrid graphics?
Headless systems?

Regards
//Ernst

Den tors 22 dec. 2022 kl 19:30 skrev Mario Limonciello <
mario.limoncie...@amd.com>:

> If the probe sequence fails then the user is stuck with a frozen
> screen and can only really recover via SSH or by rebooting and
> applying nomodeset to the kernel command line.
>
> This is particularly problematic as newer GPUs are introduced because
> distributions may take some time to land newer GPU firmware.
>
> So when probe fails, re-create the system framebuffer so that the
> user at least has basic graphics support.
>
> Signed-off-by: Mario Limonciello 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index bf2d50c8c92a..8961c62ab29b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -40,6 +40,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "amdgpu.h"
>  #include "amdgpu_irq.h"
> @@ -2187,6 +2188,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>
>  err_pci:
> pci_disable_device(pdev);
> +   sysfb_enable();
> return ret;
>  }
>
> --
> 2.34.1
>
>


Re: [PATCH 16/16] drm/amd/display: Don't restrict bpc to 8 bpc

2022-12-23 Thread Harry Wentland



On 12/14/22 04:01, Pekka Paalanen wrote:
> On Tue, 13 Dec 2022 18:20:59 +0100
> Michel Dänzer  wrote:
> 
>> On 12/12/22 19:21, Harry Wentland wrote:
>>> This will let us pass kms_hdr.bpc_switch.
>>>
>>> I don't see any good reasons why we still need to
>>> limit bpc to 8 bpc and doing so is problematic when
>>> we enable HDR.
>>>
>>> If I remember correctly there might have been some
>>> displays out there where the advertised link bandwidth
>>> was not large enough to drive the default timing at
>>> max bpc. This would leave to an atomic commit/check
>>> failure which should really be handled in compositors
>>> with some sort of fallback mechanism.
>>>
>>> If this somehow turns out to still be an issue I
>>> suggest we add a module parameter to allow users to
>>> limit the max_bpc to a desired value.  
>>
>> While leaving the fallback for user space to handle makes some sense
>> in theory, in practice most KMS display servers likely won't handle
>> it.
>>
>> Another issue is that if mode validation is based on the maximum bpc
>> value, it may reject modes which would work with lower bpc.
>>
>>
>> What Ville (CC'd) suggested before instead (and what i915 seems to be
>> doing already) is that the driver should do mode validation based on
>> the *minimum* bpc, and automatically make the effective bpc lower
>> than the maximum as needed to make the rest of the atomic state work.
> 
> A driver is always allowed to choose a bpc lower than max_bpc, so it
> very well should do so when necessary due to *known* hardware etc.
> limitations.
> 

I spent a bunch of time to figure out how this actually pans out in
amdgpu and it looks like we're doing the right thing, i.e. if bandwidth
limitations require it we'll downgrade bpc appropriately. These changes
happened over the last couple years or so. So while raising the default
max_bpc wasn't safe in amdgpu years ago it is completely fine now.

As for the relevant code it's mostly handled in create_validate_stream_for_sink
in amdgpu_dm.c where we iterate over a stream's mode validation with
decreasing bpc if it fails (down to a bpc of 6).

For HDMI we also have a separate adjust_colour_depth_from_display_info
function that downgrades bpc in order to fit within the max_tmds_clock.

So, in short, this change should not lead to displays not lighting up
because we no longer force a given bpc.

> So things like mode validation cannot just look at a single max or min
> bpc, but it needs to figure out if there is any usable bpc value that
> makes the mode work.
> 
> The max_bpc knob exists only for the cases where the sink undetectably
> malfunctions unless the bpc is artificially limited more than seems
> necessary. That malfunction requires a human to detect, and reconfigure
> their system as we don't have a quirk database for this I think.
> 
> The question of userspace wanting a specific bpc is a different matter
> and an unsolved one. It also ties to userspace wanting to use the
> current mode to avoid a mode switch between e.g. hand-off from firmware
> boot splash to proper userspace. That's also unsolved AFAIK.
> 

Agreed, the current "max bpc" just sets a max. We'd probably want a
"min bpc" if userspace needs a minimum (e.g., for HDR).

Harry

> OTOH, we have the discussion that concluded as
> https://gitlab.freedesktop.org/wayland/weston/-/issues/612#note_1359898
> which really puts userspace in charge of max_bpc, so the driver-chosen
> default value does not have much impact as long as it makes the
> firmware-chosen video mode to continue, as requested in
> https://gitlab.freedesktop.org/wayland/weston/-/merge_requests/995
> given that userspace cannot know what the actual bpc currently is nor
> set the exact bpc to keep it the same.
> 
> 
> Thanks,
> pq



[RFC 0/7] RFC: Usermode queue for AMDGPU driver

2022-12-23 Thread Shashank Sharma
This is a RFC series to implement usermode graphics queues for AMDGPU
driver (Navi 3X and above). The idea of usermode graphics queue is to
allow direct workload submission from a userspace graphics process who
has amdgpu graphics context.

Once we have some initial feedback on the design, we will publish a
follow up V1 series with a libdrm consumer test. 

Cc: Alex Deucher 
Cc: Christian Koenig 

Alex Deucher (1):
  drm/amdgpu: UAPI for user queue management

Arunpravin Paneer Selvam (1):
  drm/amdgpu: Secure semaphore for usermode queue

Arvind Yadav (1):
  drm/amdgpu: Create MQD for userspace queue

Shashank Sharma (4):
  drm/amdgpu: Add usermode queue for gfx work
  drm/amdgpu: Allocate doorbell slot for user queue
  drm/amdgpu: Create context for usermode queue
  drm/amdgpu: Map userqueue into HW

 drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  14 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 
 .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 
 .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
 .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++
 include/uapi/drm/amdgpu_drm.h |  52 ++
 8 files changed, 1413 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h

-- 
2.34.1



[RFC 1/7] drm/amdgpu: UAPI for user queue management

2022-12-23 Thread Shashank Sharma
From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 include/uapi/drm/amdgpu_drm.h | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 0d93ec132ebb..a3d0dd6f62c5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
 #define DRM_AMDGPU_VM  0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
 #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
 
 /**
  * DOC: memory domains
@@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;
+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Context handle to associate the queue with */
+   __u32   ctx_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;
+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out out;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID  1
 #define AMDGPU_VM_OP_UNRESERVE_VMID2
-- 
2.34.1



[RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2022-12-23 Thread Shashank Sharma
This patch adds skeleton code for usermode queue creation. It
typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A global ptr in amdgpu_dev

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +
 5 files changed, 246 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6ad39cf71bdd..e2a34ee57bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -209,6 +209,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += amdgpu_amdkfd.o
 
+# add usermode queue
+amdgpu-y += amdgpu_userqueue.o
 
 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8639a4f9c6e8..4b566fcfca18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,6 +749,11 @@ struct amdgpu_mqd {
struct amdgpu_mqd_prop *p);
 };
 
+struct amdgpu_userq_globals {
+   struct ida ida;
+   struct mutex userq_mutex;
+};
+
 #define AMDGPU_RESET_MAGIC_NUM 64
 #define AMDGPU_MAX_DF_PERFMONS 4
 #define AMDGPU_PRODUCT_NAME_LEN 64
@@ -955,6 +960,7 @@ struct amdgpu_device {
boolenable_mes_kiq;
struct amdgpu_mes   mes;
struct amdgpu_mqd   mqds[AMDGPU_HW_IP_NUM];
+   struct amdgpu_userq_globals userq;
 
/* df */
struct amdgpu_dfdf;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long   ras_counter_ce;
unsigned long   ras_counter_ue;
uint32_tstable_pstate;
+   struct amdgpu_usermode_queue*userq;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+int index;
+struct amdgpu_userq_globals *uqg = &adev->userq;
+
+index = ida_simple_get(&uqg->ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
+return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+struct amdgpu_userq_globals *uqg = &adev->userq;
+
+ida_simple_remove(&uqg->ida, queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct 
drm_amdgpu_userq_mqd *mqd_in)
+{
+if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
mqd_in->doorbell_offset == 0) {
+DRM_ERROR("Invalid queue object address\n");
+return -EINVAL;
+}
+
+if (mqd_in->queue_size == 0 |

[RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue

2022-12-23 Thread Shashank Sharma
This patch allocates a doorbell slot in the bar, for the usermode queue.
We are using the unique queue-id to get this slot from MES.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index a91cc304cb9e..b566ce4cb7f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -50,6 +50,25 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode
 ida_simple_remove(&uqg->ida, queue->queue_id);
 }
 
+static int
+amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
+struct amdgpu_usermode_queue *queue)
+{
+int r;
+unsigned int doorbell_index;
+
+r = amdgpu_mes_alloc_process_doorbells(adev, &doorbell_index);
+   if (r < 0) {
+DRM_ERROR("Failed to allocate doorbell for user queue\n");
+return r;
+}
+
+/* We are using qnique queue-id to get doorbell here */
+queue->doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
+   doorbell_index, queue->queue_id);
+return 0;
+}
+
 static void
 amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
 {
@@ -257,12 +276,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 goto free_queue;
 }
 
+r = amdgpu_userqueue_get_doorbell(adev, queue);
+if (r) {
+DRM_ERROR("Failed to create doorbell for queue\n");
+goto free_mqd;
+}
+
 ctx->userq = queue;
 args->out.q_id = queue->queue_id;
 args->out.flags = 0;
 mutex_unlock(&adev->userq.userq_mutex);
 return 0;
 
+free_mqd:
+amdgpu_userqueue_destroy_mqd(queue);
+
 free_queue:
 amdgpu_userqueue_remove_index(adev, queue);
 mutex_unlock(&adev->userq.userq_mutex);
-- 
2.34.1



[RFC 3/7] drm/amdgpu: Create MQD for userspace queue

2022-12-23 Thread Shashank Sharma
From: Arvind Yadav 

MQD describes the properies of a user queue to the HW, and allows it to
accurately configure the queue while mapping it in GPU HW. This patch
adds:
- A new header file which contains the MQD definition
- A new function which creates an MQD object and fills it with userqueue
  data

Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Arvind Yadav 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 138 +
 .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++
 2 files changed, 682 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 3b6e8f75495c..a91cc304cb9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -25,7 +25,10 @@
 #include "amdgpu_vm.h"
 #include "amdgpu_mes.h"
 #include "amdgpu_usermode_queue.h"
+#include "amdgpu_usermode_queue_mqd.h"
 #include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
 
 #define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
 
@@ -47,6 +50,134 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode
 ida_simple_remove(&uqg->ida, queue->queue_id);
 }
 
+static void
+amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+struct amdgpu_usermode_queue_mqd *mqd = queue->mqd_cpu_ptr;
+uint64_t hqd_gpu_addr, wb_gpu_addr;
+uint32_t tmp;
+uint32_t rb_bufsz;
+
+/* set up gfx hqd wptr */
+mqd->cp_gfx_hqd_wptr = 0;
+mqd->cp_gfx_hqd_wptr_hi = 0;
+
+/* set the pointer to the MQD */
+mqd->cp_mqd_base_addr = queue->mqd_gpu_addr & 0xfffc;
+mqd->cp_mqd_base_addr_hi = upper_32_bits(queue->mqd_gpu_addr);
+
+/* set up mqd control */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+mqd->cp_gfx_mqd_control = tmp;
+
+/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+mqd->cp_gfx_hqd_vmid = 0;
+
+/* set up default queue priority level
+* 0x0 = low priority, 0x1 = high priority */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+mqd->cp_gfx_hqd_queue_priority = tmp;
+
+/* set up time quantum */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+mqd->cp_gfx_hqd_quantum = tmp;
+
+/* set up gfx hqd base. this is similar as CP_RB_BASE */
+hqd_gpu_addr = queue->queue_gpu_addr >> 8;
+mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+wb_gpu_addr = queue->rptr_gpu_addr;
+mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffc;
+mqd->cp_gfx_hqd_rptr_addr_hi =
+upper_32_bits(wb_gpu_addr) & 0x;
+
+/* set up rb_wptr_poll addr */
+wb_gpu_addr = queue->wptr_gpu_addr;
+mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffc;
+mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0x;
+
+/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+rb_bufsz = order_base_2(queue->queue_size / 4) - 1;
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+mqd->cp_gfx_hqd_cntl = tmp;
+
+/* set up cp_doorbell_control */
+tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+if (queue->use_doorbell) {
+tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+DOORBELL_OFFSET, queue->doorbell_index);
+tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+DOORBELL_EN, 1);
+} else {
+tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+DOORBELL_EN, 0);
+}
+mqd->cp_rb_doorbell_control = tmp;
+
+/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+
+/* activate the queue */
+mqd->cp_gfx_hqd_active = 1;
+}
+
+static int
+amdgpu_userqueue_create_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+int r;
+int size = sizeof(struct amdgpu_usermode_queue_mqd);
+
+r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+  

[RFC 5/7] drm/amdgpu: Create context for usermode queue

2022-12-23 Thread Shashank Sharma
The FW expects us to allocate atleast one page as process
context space, and one for gang context space. This patch adds some
object for the same.

Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
 2 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index b566ce4cb7f0..2a854a5e2f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
 return 0;
 }
 
+static int
+amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+int r;
+struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+/*
+ * The FW expects atleast one page space allocated for
+ * process context related work, and one for gang context.
+ */
+r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+&pctx->obj,
+&pctx->gpu_addr,
+&pctx->cpu_ptr);
+if (r) {
+DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+return r;
+}
+
+r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+&gctx->obj,
+&gctx->gpu_addr,
+&gctx->cpu_ptr);
+if (r) {
+DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+amdgpu_bo_free_kernel(&pctx->obj,
+  &pctx->gpu_addr,
+  &pctx->cpu_ptr);
+return r;
+}
+
+return 0;
+}
+
+static void
+amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+
+amdgpu_bo_free_kernel(&pctx->obj,
+  &pctx->gpu_addr,
+  &pctx->cpu_ptr);
+amdgpu_bo_free_kernel(&pctx->obj,
+  &gctx->gpu_addr,
+  &gctx->cpu_ptr);
+}
+
 static void
 amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
 {
@@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 goto free_mqd;
 }
 
+r = amdgpu_userqueue_create_context(adev, queue);
+if (r < 0) {
+DRM_ERROR("Failed to create context for queue\n");
+goto free_mqd;
+}
+
 ctx->userq = queue;
 args->out.q_id = queue->queue_id;
 args->out.flags = 0;
@@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, 
struct drm_file *filp,
 struct amdgpu_usermode_queue *queue = ctx->userq;
 
 mutex_lock(&adev->userq.userq_mutex);
+amdgpu_userqueue_free_context(adev, queue);
 amdgpu_userqueue_destroy_mqd(queue);
 amdgpu_userqueue_remove_index(adev, queue);
 ctx->userq = NULL;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
index c1fe39ffaf72..8bf3c0be6937 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -26,6 +26,12 @@
 
 #define AMDGPU_MAX_USERQ 512
 
+struct amdgpu_userq_ctx {
+   struct amdgpu_bo *obj;
+   uint64_t gpu_addr;
+   void*cpu_ptr;
+};
+
 struct amdgpu_usermode_queue {
int queue_id;
int queue_type;
@@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {
 
struct amdgpu_bo*mqd_obj;
struct amdgpu_vm*vm;
+   struct amdgpu_userq_ctx proc_ctx;
+   struct amdgpu_userq_ctx gang_ctx;
struct list_headlist;
 };
 
-- 
2.34.1



[RFC 7/7] drm/amdgpu: Secure semaphore for usermode queue

2022-12-23 Thread Shashank Sharma
From: Arunpravin Paneer Selvam 

This is a WIP patch, which adds an kernel implementation of secure
semaphore for the usermode queues. The UAPI for the same is yet to
be implemented.

The idea is to create a RO page and map it to each process requesting a
user mode queue, and give them a qnique offset in the page, which can be
polled (like wait_mem) for sync.

Cc: Alex Deucher 
Cc: Christian Koenig 
Cc: Shashank Shamra 

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   7 +-
 .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 ++
 .../drm/amd/include/amdgpu_usermode_queue.h   |  10 +
 .../amd/include/amdgpu_usermode_queue_mqd.h   |   4 +-
 6 files changed, 272 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index e2a34ee57bfb..daec7bb9ab3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -211,6 +211,7 @@ amdgpu-y += amdgpu_amdkfd.o
 
 # add usermode queue
 amdgpu-y += amdgpu_userqueue.o
+amdgpu-y += amdgpu_userqueue_secure_sem.o
 
 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b566fcfca18..7325c01efc90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,9 +749,17 @@ struct amdgpu_mqd {
struct amdgpu_mqd_prop *p);
 };
 
+struct amdgpu_userq_sec_sem {
+   struct amdgpu_bo *sem_obj;
+   u64 gpu_addr;
+   u32 num_sem;
+   unsigned long used[DIV_ROUND_UP(64, BITS_PER_LONG)];
+};
+
 struct amdgpu_userq_globals {
struct ida ida;
struct mutex userq_mutex;
+   struct amdgpu_userq_sec_sem sem;
 };
 
 #define AMDGPU_RESET_MAGIC_NUM 64
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index b164e24247ca..2af634bbe3dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -261,6 +261,10 @@ amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, 
struct amdgpu_usermode_qu
 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
 
+/* Setup semaphore fence address */
+mqd->fenceaddress_lo = queue->sem_data.sem_gpu_addr & 0xFFFC;
+mqd->fenceaddress_lo = upper_32_bits(queue->sem_data.sem_gpu_addr) & 
0x;
+
 /* activate the queue */
 mqd->cp_gfx_hqd_active = 1;
 }
@@ -472,10 +476,11 @@ int amdgpu_userqueue_init(struct amdgpu_device *adev)
 struct amdgpu_userq_globals *uqg = &adev->userq;
 
 mutex_init(&uqg->userq_mutex);
+amdgpu_userqueue_sec_sem_init(adev);
 return 0;
 }
 
 void amdgpu_userqueue_fini(struct amdgpu_device *adev)
 {
-
+amdgpu_userqueue_sec_sem_fini(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
new file mode 100644
index ..6e6a7d62a300
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "amdgpu_usermode_queue_mqd.h"
+
+static int amdgpu_userqueue_sem_addr_unmap(struct amdgpu_device *adev,
+  struct amdgpu_usermode_queue *q)
+{
+struct amdgpu_userq_sec_sem_data *sem_bo_data = &q->sem_data;
+struct amdgpu_bo_va *bo_va = sem_bo_data->sem_data_va;
+struct amdgpu_vm *vm = bo_va->base.v

[RFC 6/7] drm/amdgpu: Map userqueue into HW

2022-12-23 Thread Shashank Sharma
This patch add the function to map/unmap the usermode queue into the HW,
using the prepared MQD and other objects. After this mapping, the queue
will be ready to accept the workload.

Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 71 +++
 1 file changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 2a854a5e2f70..b164e24247ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -50,6 +50,67 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode
 ida_simple_remove(&uqg->ida, queue->queue_id);
 }
 
+static int amdgpu_userqueue_map(struct amdgpu_device *adev,
+struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct mes_add_queue_input queue_input;
+
+memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+queue_input.process_va_start = 0;
+queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+queue_input.process_quantum = 10; /* 10ms */
+queue_input.gang_quantum = 1; /* 1ms */
+queue_input.paging = false;
+
+queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
+queue_input.process_context_addr = queue->proc_ctx.gpu_addr;
+queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+
+queue_input.process_id = queue->pasid;
+queue_input.queue_type = queue->queue_type;
+queue_input.mqd_addr = queue->mqd_gpu_addr;
+queue_input.wptr_addr = queue->wptr_gpu_addr;
+queue_input.queue_size = queue->queue_size >> 2;
+queue_input.doorbell_offset = queue->doorbell_index;
+queue_input.page_table_base_addr =  queue->vm->pd_phys_addr;
+
+amdgpu_mes_lock(&adev->mes);
+r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+amdgpu_mes_unlock(&adev->mes);
+if (r) {
+DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+return r;
+}
+
+DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", queue->queue_id);
+return 0;
+}
+
+static void amdgpu_userqueue_unmap(struct amdgpu_device *adev,
+struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct mes_remove_queue_input queue_input;
+
+memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+queue_input.doorbell_offset = queue->doorbell_index;
+queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
+
+amdgpu_mes_lock(&adev->mes);
+r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+amdgpu_mes_unlock(&adev->mes);
+
+if (r) {
+DRM_ERROR("Failed to unmap usermode queue %d\n", queue->queue_id);
+return;
+}
+
+DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
+}
+
 static int
 amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
 struct amdgpu_usermode_queue *queue)
@@ -338,12 +399,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 goto free_mqd;
 }
 
+r = amdgpu_userqueue_map(adev, queue);
+if (r < 0) {
+DRM_ERROR("Failed to map queue\n");
+goto free_ctx;
+}
+
 ctx->userq = queue;
 args->out.q_id = queue->queue_id;
 args->out.flags = 0;
 mutex_unlock(&adev->userq.userq_mutex);
 return 0;
 
+free_ctx:
+amdgpu_userqueue_free_context(adev, queue);
+
 free_mqd:
 amdgpu_userqueue_destroy_mqd(queue);
 
@@ -362,6 +432,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, 
struct drm_file *filp,
 struct amdgpu_usermode_queue *queue = ctx->userq;
 
 mutex_lock(&adev->userq.userq_mutex);
+amdgpu_userqueue_unmap(adev, queue);
 amdgpu_userqueue_free_context(adev, queue);
 amdgpu_userqueue_destroy_mqd(queue);
 amdgpu_userqueue_remove_index(adev, queue);
-- 
2.34.1