drm/amdgpu: revert "Add autodump debugfs node for gpu reset v8"
authorChristian König <christian.koenig@amd.com>
Thu, 30 Sep 2021 09:22:51 +0000 (11:22 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 6 Nov 2021 13:11:29 +0000 (14:11 +0100)
commit c8365dbda056578eebe164bf110816b1a39b4b7f upstream.

This reverts commit 728e7e0cd61899208e924472b9e641dbeb0775c4.

Further discussion reveals that this feature is severely broken
and needs to be reverted ASAP.

GPU reset can never be delayed by userspace even for debugging or
otherwise we can run into in kernel deadlocks.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Nirmoy Das <nirmoy.das@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index a1c5bd2859fc325b9fb216361d6b0d6ad0a89c91..d90dc5efc3340f78c5238ad1821c126d7398e7d5 100644 (file)
@@ -1073,8 +1073,6 @@ struct amdgpu_device {
        char                            product_name[32];
        char                            serial[20];
 
-       struct amdgpu_autodump          autodump;
-
        atomic_t                        throttling_logging_enabled;
        struct ratelimit_state          throttling_logging_rs;
        uint32_t                        ras_hw_enabled;
index 83db7d8fa1508109a491bcd5e4c6911cf5f4523d..a0f197eaaec0ab32289f9469a12d09288814d87a 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/pci.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
-#include <linux/poll.h>
 
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_securedisplay.h"
 #include "amdgpu_fw_attestation.h"
 
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
-{
 #if defined(CONFIG_DEBUG_FS)
-       unsigned long timeout = 600 * HZ;
-       int ret;
-
-       wake_up_interruptible(&adev->autodump.gpu_hang);
-
-       ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
-       if (ret == 0) {
-               pr_err("autodump: timeout, move on to gpu recovery\n");
-               return -ETIMEDOUT;
-       }
-#endif
-       return 0;
-}
-
-#if defined(CONFIG_DEBUG_FS)
-
-static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
-{
-       struct amdgpu_device *adev = inode->i_private;
-       int ret;
-
-       file->private_data = adev;
-
-       ret = down_read_killable(&adev->reset_sem);
-       if (ret)
-               return ret;
-
-       if (adev->autodump.dumping.done) {
-               reinit_completion(&adev->autodump.dumping);
-               ret = 0;
-       } else {
-               ret = -EBUSY;
-       }
-
-       up_read(&adev->reset_sem);
-
-       return ret;
-}
-
-static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
-{
-       struct amdgpu_device *adev = file->private_data;
-
-       complete_all(&adev->autodump.dumping);
-       return 0;
-}
-
-static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
-{
-       struct amdgpu_device *adev = file->private_data;
-
-       poll_wait(file, &adev->autodump.gpu_hang, poll_table);
-
-       if (amdgpu_in_reset(adev))
-               return POLLIN | POLLRDNORM | POLLWRNORM;
-
-       return 0;
-}
-
-static const struct file_operations autodump_debug_fops = {
-       .owner = THIS_MODULE,
-       .open = amdgpu_debugfs_autodump_open,
-       .poll = amdgpu_debugfs_autodump_poll,
-       .release = amdgpu_debugfs_autodump_release,
-};
-
-static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
-{
-       init_completion(&adev->autodump.dumping);
-       complete_all(&adev->autodump.dumping);
-       init_waitqueue_head(&adev->autodump.gpu_hang);
-
-       debugfs_create_file("amdgpu_autodump", 0600,
-               adev_to_drm(adev)->primary->debugfs_root,
-               adev, &autodump_debug_fops);
-}
 
 /**
  * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
@@ -1588,7 +1509,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
        }
 
        amdgpu_ras_debugfs_create_all(adev);
-       amdgpu_debugfs_autodump_init(adev);
        amdgpu_rap_debugfs_init(adev);
        amdgpu_securedisplay_debugfs_init(adev);
        amdgpu_fw_attestation_debugfs_init(adev);
index 141a8474e24f24d75b340b236ecdee7ef5b178f2..8b641f40fdf66ff431b9915d921ec47cbbbfd34f 100644 (file)
 /*
  * Debugfs
  */
-struct amdgpu_autodump {
-       struct completion               dumping;
-       struct wait_queue_head          gpu_hang;
-};
 
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 int amdgpu_debugfs_init(struct amdgpu_device *adev);
@@ -37,4 +33,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
index b0824d2f0e0bd083e6244d4314212a43e9d66853..08e53ff74728289f5c197eab474d03d1cc89954b 100644 (file)
@@ -4407,10 +4407,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
        if (reset_context->reset_req_dev == adev)
                job = reset_context->job;
 
-       /* no need to dump if device is not in good state during probe period */
-       if (!adev->gmc.xgmi.pending_reset)
-               amdgpu_debugfs_wait_dump(adev);
-
        if (amdgpu_sriov_vf(adev)) {
                /* stop the data exchange thread */
                amdgpu_virt_fini_data_exchange(adev);