habanalabs: add "in device creation" status
authorOmer Shpigelman <oshpigelman@habana.ai>
Mon, 16 Aug 2021 10:27:12 +0000 (13:27 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 Sep 2021 12:10:24 +0000 (14:10 +0200)
[ Upstream commit 71731090ab17a208a58020e4b342fdfee280458a ]

On init, the disabled state is cleared right before hw_init and that
causes the device to report on "Operational" state before the device
initialization is finished. Although the char device is not yet exposed
to the user at this stage, the sysfs entries are exposed.

This can cause errors in monitoring applications that use the sysfs
entries.

In order to avoid this, a new state "in device creation" is introduced
to ne reported when the device is not disabled but is still in init
flow.

Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/sysfs.c
include/uapi/misc/habanalabs.h

index ff4cbde289c0b6ac27a5c8689e49ad00c607ffbf..4e9b677460bad10b494d99f1357fb0f219675a5a 100644 (file)
@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
                status = HL_DEVICE_STATUS_NEEDS_RESET;
        else if (hdev->disabled)
                status = HL_DEVICE_STATUS_MALFUNCTION;
+       else if (!hdev->init_done)
+               status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
        else
                status = HL_DEVICE_STATUS_OPERATIONAL;
 
@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
        case HL_DEVICE_STATUS_NEEDS_RESET:
                return false;
        case HL_DEVICE_STATUS_OPERATIONAL:
+       case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
        default:
                return true;
        }
index 6b3cdd7e068a364ccb39455db6003b90be708100..61db72ecec0e00764bdf6ec90e61292a30667391 100644 (file)
@@ -1798,7 +1798,7 @@ struct hl_dbg_device_entry {
 
 #define HL_STR_MAX     32
 
-#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
 
 /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
  * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
index 4194cda2d04c39e0d06ecbf85ae4ebd9623ede3c..536451a9a16c97a4774181b5592f3f413d0d22b4 100644 (file)
@@ -318,12 +318,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
                hdev->asic_prop.fw_security_enabled = false;
 
        /* Assign status description string */
-       strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
-                                       "disabled", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
+                                       "operational", HL_STR_MAX);
        strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
                                        "in reset", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
+                                       "disabled", HL_STR_MAX);
        strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
                                        "needs reset", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
+                                       "in device creation", HL_STR_MAX);
 
        hdev->major = hl_major;
        hdev->reset_on_lockup = reset_on_lockup;
index db72df282ef8dc2b51c893cae3923a82f3d45a6f..34f9f2779962a15a0c62142bc47719a1cb7e3c62 100644 (file)
@@ -9,8 +9,7 @@
 
 #include <linux/pci.h>
 
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
-                                                               bool curr)
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 {
        struct cpucp_packet pkt;
        u32 used_pll_idx;
@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
        return (long) result;
 }
 
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
-                                                               u64 freq)
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 {
        struct cpucp_packet pkt;
        u32 used_pll_idx;
@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
                                char *buf)
 {
        struct hl_device *hdev = dev_get_drvdata(dev);
-       char *str;
+       char str[HL_STR_MAX];
 
-       if (atomic_read(&hdev->in_reset))
-               str = "In reset";
-       else if (hdev->disabled)
-               str = "Malfunction";
-       else if (hdev->needs_reset)
-               str = "Needs Reset";
-       else
-               str = "Operational";
+       strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
+
+       /* use uppercase for backward compatibility */
+       str[0] = 'A' + (str[0] - 'a');
 
        return sprintf(buf, "%s\n", str);
 }
index a47a731e452778d586798442266190be1ab7a723..b4b681b81df816e899785db26d7e3afba3d14b3b 100644 (file)
@@ -276,7 +276,9 @@ enum hl_device_status {
        HL_DEVICE_STATUS_OPERATIONAL,
        HL_DEVICE_STATUS_IN_RESET,
        HL_DEVICE_STATUS_MALFUNCTION,
-       HL_DEVICE_STATUS_NEEDS_RESET
+       HL_DEVICE_STATUS_NEEDS_RESET,
+       HL_DEVICE_STATUS_IN_DEVICE_CREATION,
+       HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
 };
 
 /* Opcode for management ioctl