Last active
April 6, 2024 18:59
-
-
Save Kamillaova/4caa47cc891e04eb50b94d8513f7533e to your computer and use it in GitHub Desktop.
[PATCH] Driver of Intel(R) Gaussian & Neural Accelerator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 00f3bad3bd4e9b4a60983857b805082750de30ed Mon Sep 17 00:00:00 2001 | |
From: Kamillaova <[email protected]> | |
Date: Sat, 6 Apr 2024 21:53:42 +0300 | |
Subject: [PATCH] Driver of Intel(R) Gaussian & Neural Accelerator | |
Link: https://lore.kernel.org/dri-devel/[email protected] | |
--- | |
Documentation/gpu/drivers.rst | 1 + | |
Documentation/gpu/gna.rst | 64 +++++ | |
MAINTAINERS | 7 + | |
drivers/gpu/drm/Kconfig | 2 + | |
drivers/gpu/drm/Makefile | 1 + | |
drivers/gpu/drm/gna/Kbuild | 5 + | |
drivers/gpu/drm/gna/Kconfig | 15 + | |
drivers/gpu/drm/gna/gna_device.c | 317 +++++++++++++++++++++ | |
drivers/gpu/drm/gna/gna_device.h | 114 ++++++++ | |
drivers/gpu/drm/gna/gna_gem.h | 22 ++ | |
drivers/gpu/drm/gna/gna_hw.c | 110 ++++++++ | |
drivers/gpu/drm/gna/gna_hw.h | 107 ++++++++ | |
drivers/gpu/drm/gna/gna_ioctl.c | 208 ++++++++++++++ | |
drivers/gpu/drm/gna/gna_mem.c | 249 +++++++++++++++++ | |
drivers/gpu/drm/gna/gna_mem.h | 58 ++++ | |
drivers/gpu/drm/gna/gna_pci.c | 148 ++++++++++ | |
drivers/gpu/drm/gna/gna_pci.h | 12 + | |
drivers/gpu/drm/gna/gna_request.c | 441 ++++++++++++++++++++++++++++++ | |
drivers/gpu/drm/gna/gna_request.h | 64 +++++ | |
drivers/gpu/drm/gna/gna_score.c | 222 +++++++++++++++ | |
drivers/gpu/drm/gna/gna_score.h | 11 + | |
include/uapi/drm/gna_drm.h | 169 ++++++++++++ | |
22 files changed, 2347 insertions(+) | |
create mode 100644 Documentation/gpu/gna.rst | |
create mode 100644 drivers/gpu/drm/gna/Kbuild | |
create mode 100644 drivers/gpu/drm/gna/Kconfig | |
create mode 100644 drivers/gpu/drm/gna/gna_device.c | |
create mode 100644 drivers/gpu/drm/gna/gna_device.h | |
create mode 100644 drivers/gpu/drm/gna/gna_gem.h | |
create mode 100644 drivers/gpu/drm/gna/gna_hw.c | |
create mode 100644 drivers/gpu/drm/gna/gna_hw.h | |
create mode 100644 drivers/gpu/drm/gna/gna_ioctl.c | |
create mode 100644 drivers/gpu/drm/gna/gna_mem.c | |
create mode 100644 drivers/gpu/drm/gna/gna_mem.h | |
create mode 100644 drivers/gpu/drm/gna/gna_pci.c | |
create mode 100644 drivers/gpu/drm/gna/gna_pci.h | |
create mode 100644 drivers/gpu/drm/gna/gna_request.c | |
create mode 100644 drivers/gpu/drm/gna/gna_request.h | |
create mode 100644 drivers/gpu/drm/gna/gna_score.c | |
create mode 100644 drivers/gpu/drm/gna/gna_score.h | |
create mode 100644 include/uapi/drm/gna_drm.h | |
diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst | |
index b899cbc5c2b4..1ca0ab0f50fa 100644 | |
--- a/Documentation/gpu/drivers.rst | |
+++ b/Documentation/gpu/drivers.rst | |
@@ -6,6 +6,7 @@ GPU Driver Documentation | |
:maxdepth: 3 | |
amdgpu/index | |
+ gna | |
i915 | |
imagination/index | |
mcde | |
diff --git a/Documentation/gpu/gna.rst b/Documentation/gpu/gna.rst | |
new file mode 100644 | |
index 000000000000..7f3b7ce7e8f7 | |
--- /dev/null | |
+++ b/Documentation/gpu/gna.rst | |
@@ -0,0 +1,64 @@ | |
+.. SPDX-License-Identifier: GPL-2.0-only | |
+ | |
+===================================================== | |
+Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA) | |
+===================================================== | |
+ | |
+Acronyms | |
+-------- | |
+GNA - Gaussian & Neural Accelerator | |
+GMM - Gaussian Mixer Model | |
+CNN - Convolutional Neural Network | |
+RNN - Recurrent Neural Networks | |
+DNN - Deep Neural Networks | |
+ | |
+Introduction | |
+------------ | |
+The Intel(R) GNA is an internal PCI fixed device available on several Intel platforms/SoCs. | |
+Feature set depends on the Intel chipset SKU. | |
+ | |
+Intel(R) GNA provides hardware accelerated computation for GMMs and Neural Networks. | |
+It supports several layer types: affine, recurrent, and convolutional among others. | |
+Hardware also provides helper layer types for copying and transposing matrices. | |
+ | |
+Linux Driver | |
+------------ | |
+The driver also registers a DRM's render device to expose file operations via dev node. | |
+ | |
+The driver probes/removes a PCI device, implements file operations, handles runtime | |
+power management, and interacts with hardware through MMIO registers. | |
+ | |
+Multiple processes can independently file many requests to the driver. These requests are | |
+processed in a FIFO manner. The hardware can process one request at a time by using a FIFO | |
+queue. | |
+ | |
+IOCTL | |
+----- | |
+Intel(R) GNA driver controls the device through IOCTL interfaces. | |
+Following IOCTL commands - handled by DRM framework - are supported: | |
+ | |
+GNA_GET_PARAMETER gets driver and device capabilities. | |
+ | |
+GNA_GEM_NEW acquires new 4KB page aligned memory region ready for DMA operations. | |
+ | |
+GNA_GEM_FREE frees memory region back to system. | |
+ | |
+GNA_COMPUTE submits a request to the device queue. | |
+ Memory regions acquired by GNA_GEM_NEW are part of request. | |
+ | |
+GNA_WAIT blocks and waits on the submitted request. | |
+ | |
+GNA MMU | |
+------- | |
+GNA’s MMU is being configured based on specific request memory usage. As the MMU can | |
+address up to 256MB a single scoring request is limited to this amount of memory being | |
+used. | |
+ | |
+GNA Library can allocate any number of memory regions for GNA usage. Its number and total | |
+capacity are limited by the OSs’ resources. Due to GNA MMU restrictions, even when using | |
+multiple memory regions, the sum of all the memory regions used within a single inference | |
+request must be no larger than 256MB. | |
+ | |
+At least a single GNA memory region is needed to be allocated (and can be shared by | |
+multiple models). At the other extreme, each GNA tensor (e.g., | |
+weights/biases/inputs/outputs) could use its own, separate GNA memory region. | |
diff --git a/MAINTAINERS b/MAINTAINERS | |
index 1aabf1c15bb3..1e91987281f4 100644 | |
--- a/MAINTAINERS | |
+++ b/MAINTAINERS | |
@@ -10790,6 +10790,13 @@ S: Supported | |
F: drivers/infiniband/hw/irdma/ | |
F: include/uapi/rdma/irdma-abi.h | |
+INTEL GNA PCI DRIVER | |
+M: Maciej Kwapulinski <[email protected]> | |
+S: Maintained | |
+F: Documentation/gpu/gna.rst | |
+F: drivers/gpu/drm/gna/* | |
+F: include/uapi/drm/gna_drm.h | |
+ | |
INTEL GPIO DRIVERS | |
M: Andy Shevchenko <[email protected]> | |
L: [email protected] | |
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig | |
index c7edba18a6f0..15a1c7f1b310 100644 | |
--- a/drivers/gpu/drm/Kconfig | |
+++ b/drivers/gpu/drm/Kconfig | |
@@ -400,6 +400,8 @@ source "drivers/gpu/drm/sprd/Kconfig" | |
source "drivers/gpu/drm/imagination/Kconfig" | |
+source "drivers/gpu/drm/gna/Kconfig" | |
+ | |
config DRM_HYPERV | |
tristate "DRM Support for Hyper-V synthetic video device" | |
depends on DRM && PCI && MMU && HYPERV | |
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile | |
index 104b42df2e95..44e633e9471d 100644 | |
--- a/drivers/gpu/drm/Makefile | |
+++ b/drivers/gpu/drm/Makefile | |
@@ -187,5 +187,6 @@ obj-y += gud/ | |
obj-$(CONFIG_DRM_HYPERV) += hyperv/ | |
obj-y += solomon/ | |
obj-$(CONFIG_DRM_SPRD) += sprd/ | |
+obj-$(CONFIG_DRM_GNA) += gna/ | |
obj-$(CONFIG_DRM_LOONGSON) += loongson/ | |
obj-$(CONFIG_DRM_POWERVR) += imagination/ | |
diff --git a/drivers/gpu/drm/gna/Kbuild b/drivers/gpu/drm/gna/Kbuild | |
new file mode 100644 | |
index 000000000000..d799c9530f79 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/Kbuild | |
@@ -0,0 +1,5 @@ | |
+# SPDX-License-Identifier: GPL-2.0-only | |
+ | |
+gna-y := gna_device.o gna_hw.o gna_ioctl.o gna_mem.o gna_pci.o gna_request.o gna_score.o | |
+ | |
+obj-$(CONFIG_DRM_GNA) += gna.o | |
diff --git a/drivers/gpu/drm/gna/Kconfig b/drivers/gpu/drm/gna/Kconfig | |
new file mode 100644 | |
index 000000000000..6c32716bf43a | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/Kconfig | |
@@ -0,0 +1,15 @@ | |
+# | |
+# Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA) | |
+# | |
+ | |
+config DRM_GNA | |
+ tristate "Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA)" | |
+ depends on X86 && PCI | |
+ depends on DRM | |
+ select DRM_GEM_SHMEM_HELPER | |
+ help | |
+ This option enables the Intel(R) Gaussian & Neural Accelerator | |
+ (Intel(R) GNA) driver: gna | |
+ User space interface is defined in include/uapi/drm/gna_drm.h, while | |
+ information about functionality is in | |
+ Documentation/gpu/gna.rst | |
diff --git a/drivers/gpu/drm/gna/gna_device.c b/drivers/gpu/drm/gna/gna_device.c | |
new file mode 100644 | |
index 000000000000..4ce08bf313c3 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_device.c | |
@@ -0,0 +1,317 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <drm/drm_drv.h> | |
+#include <drm/drm_file.h> | |
+#include <drm/drm_gem.h> | |
+#include <drm/drm_ioctl.h> | |
+#include <drm/drm_managed.h> | |
+ | |
+#include <linux/device.h> | |
+#include <linux/dma-mapping.h> | |
+#include <linux/interrupt.h> | |
+#include <linux/module.h> | |
+#include <linux/slab.h> | |
+#include <linux/workqueue.h> | |
+ | |
+#include <uapi/drm/gna_drm.h> | |
+ | |
+#include "gna_device.h" | |
+#include "gna_gem.h" | |
+#include "gna_request.h" | |
+ | |
+#define GNA_DDI_VERSION_CURRENT GNA_DDI_VERSION_3 | |
+ | |
+DEFINE_DRM_GEM_FOPS(gna_drm_fops); | |
+ | |
+static const struct drm_ioctl_desc gna_drm_ioctls[] = { | |
+ DRM_IOCTL_DEF_DRV(GNA_GET_PARAMETER, gna_getparam_ioctl, DRM_RENDER_ALLOW), | |
+ DRM_IOCTL_DEF_DRV(GNA_GEM_NEW, gna_gem_new_ioctl, DRM_RENDER_ALLOW), | |
+ DRM_IOCTL_DEF_DRV(GNA_GEM_FREE, gna_gem_free_ioctl, DRM_RENDER_ALLOW), | |
+ DRM_IOCTL_DEF_DRV(GNA_COMPUTE, gna_score_ioctl, DRM_RENDER_ALLOW), | |
+ DRM_IOCTL_DEF_DRV(GNA_WAIT, gna_wait_ioctl, DRM_RENDER_ALLOW), | |
+}; | |
+ | |
+static int __maybe_unused gna_runtime_suspend(struct device *dev) | |
+{ | |
+ struct drm_device *drm_dev = dev_get_drvdata(dev); | |
+ struct gna_device *gna_priv = to_gna_device(drm_dev); | |
+ u32 val = gna_reg_read(gna_priv, GNA_MMIO_D0I3C); | |
+ | |
+ dev_dbg(dev, "%s D0I3, reg %.8x\n", __func__, val); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int __maybe_unused gna_runtime_resume(struct device *dev) | |
+{ | |
+ struct drm_device *drm_dev = dev_get_drvdata(dev); | |
+ struct gna_device *gna_priv = to_gna_device(drm_dev); | |
+ u32 val = gna_reg_read(gna_priv, GNA_MMIO_D0I3C); | |
+ | |
+ dev_dbg(dev, "%s D0I3, reg %.8x\n", __func__, val); | |
+ | |
+ return 0; | |
+} | |
+ | |
+const struct dev_pm_ops __maybe_unused gna_pm = { | |
+ SET_RUNTIME_PM_OPS(gna_runtime_suspend, gna_runtime_resume, NULL) | |
+}; | |
+ | |
+static int gna_open(struct drm_device *dev, struct drm_file *file) | |
+{ | |
+ struct gna_device *gna_priv; | |
+ | |
+ gna_priv = to_gna_device(dev); | |
+ | |
+ file->driver_priv = gna_priv; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static void gna_delete_file_requests(struct drm_file *file, struct gna_device *gna_priv) | |
+{ | |
+ struct gna_request *req, *temp_req; | |
+ struct list_head *reqs_list; | |
+ | |
+ mutex_lock(&gna_priv->reqlist_lock); | |
+ | |
+ reqs_list = &gna_priv->request_list; | |
+ if (!list_empty(reqs_list)) { | |
+ list_for_each_entry_safe(req, temp_req, reqs_list, node) { | |
+ if (req->drm_f == file) { | |
+ bool is_pending; | |
+ | |
+ list_del_init(&req->node); | |
+ is_pending = cancel_work_sync(&req->work); | |
+ if (is_pending) | |
+ atomic_dec(&gna_priv->enqueued_requests); | |
+ kref_put(&req->refcount, gna_request_release); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ | |
+ mutex_unlock(&gna_priv->reqlist_lock); | |
+} | |
+ | |
+static void gna_close(struct drm_device *dev, struct drm_file *file) | |
+{ | |
+ struct gna_device *gna_priv = (struct gna_device *)file->driver_priv; | |
+ | |
+ gna_delete_file_requests(file, gna_priv); | |
+} | |
+ | |
+static void gna_drm_dev_fini(struct drm_device *dev, void *ptr) | |
+{ | |
+ drm_dev_unregister(dev); | |
+} | |
+ | |
+static int gna_drm_dev_init(struct drm_device *dev) | |
+{ | |
+ int err; | |
+ | |
+ err = drm_dev_register(dev, 0); | |
+ if (err) | |
+ return err; | |
+ | |
+ return drmm_add_action_or_reset(dev, gna_drm_dev_fini, NULL); | |
+} | |
+ | |
+static void gna_pm_init(struct device *dev) | |
+{ | |
+ pm_runtime_set_autosuspend_delay(dev, 2000); | |
+ pm_runtime_use_autosuspend(dev); | |
+ pm_runtime_mark_last_busy(dev); | |
+ pm_runtime_allow(dev); | |
+ pm_runtime_put_noidle(dev); | |
+} | |
+ | |
+static void gna_pm_fini(struct drm_device *drm, void *data) | |
+{ | |
+ struct device *dev = data; | |
+ | |
+ pm_runtime_get_noresume(dev); | |
+} | |
+ | |
+static irqreturn_t gna_interrupt(int irq, void *priv) | |
+{ | |
+ struct gna_device *gna_priv; | |
+ | |
+ gna_priv = (struct gna_device *)priv; | |
+ gna_priv->dev_busy = false; | |
+ wake_up(&gna_priv->dev_busy_waitq); | |
+ return IRQ_HANDLED; | |
+} | |
+ | |
+static void gna_workqueue_fini(struct drm_device *drm, void *data) | |
+{ | |
+ struct workqueue_struct *request_wq = data; | |
+ | |
+ destroy_workqueue(request_wq); | |
+} | |
+ | |
+static int gna_workqueue_init(struct gna_device *gna_priv) | |
+{ | |
+ const char *name = gna_name(gna_priv); | |
+ | |
+ gna_priv->request_wq = create_singlethread_workqueue(name); | |
+ if (!gna_priv->request_wq) | |
+ return -EFAULT; | |
+ | |
+ return drmm_add_action_or_reset(&gna_priv->drm, gna_workqueue_fini, gna_priv->request_wq); | |
+} | |
+ | |
+static struct drm_gem_object *gna_create_gem_object(struct drm_device *dev, | |
+ size_t size) | |
+{ | |
+ struct drm_gem_shmem_object *dshmem; | |
+ struct gna_gem_object *shmem; | |
+ | |
+ shmem = kzalloc(sizeof(*shmem), GFP_KERNEL); | |
+ if (!shmem) | |
+ return NULL; | |
+ | |
+ dshmem = &shmem->base; | |
+ | |
+ return &dshmem->base; | |
+} | |
+ | |
+static const struct drm_driver gna_drm_driver = { | |
+ .driver_features = DRIVER_GEM | DRIVER_RENDER, | |
+ .open = gna_open, | |
+ .postclose = gna_close, | |
+ | |
+ .gem_create_object = gna_create_gem_object, | |
+ | |
+ .ioctls = gna_drm_ioctls, | |
+ .num_ioctls = ARRAY_SIZE(gna_drm_ioctls), | |
+ .fops = &gna_drm_fops, | |
+ | |
+ .name = DRIVER_NAME, | |
+ .desc = DRIVER_DESC, | |
+ .date = DRIVER_DATE, | |
+ .major = DRIVER_MAJOR, | |
+ .minor = DRIVER_MINOR, | |
+ .patchlevel = DRIVER_PATCHLEVEL, | |
+}; | |
+ | |
+int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase, int irq) | |
+{ | |
+ struct gna_device *gna_priv; | |
+ struct drm_device *drm_dev; | |
+ u32 bld_reg; | |
+ int err; | |
+ | |
+ gna_priv = devm_drm_dev_alloc(parent, &gna_drm_driver, struct gna_device, drm); | |
+ if (IS_ERR(gna_priv)) | |
+ return PTR_ERR(gna_priv); | |
+ | |
+ drm_dev = &gna_priv->drm; | |
+ gna_priv->recovery_timeout_jiffies = msecs_to_jiffies(60*1000); | |
+ gna_priv->iobase = iobase; | |
+ gna_priv->info = *dev_info; | |
+ | |
+ atomic_set(&gna_priv->enqueued_requests, 0); | |
+ | |
+ if (!(sizeof(dma_addr_t) > 4) || | |
+ dma_set_mask(parent, DMA_BIT_MASK(64))) { | |
+ err = dma_set_mask(parent, DMA_BIT_MASK(32)); | |
+ if (err) | |
+ return err; | |
+ } | |
+ | |
+ bld_reg = gna_reg_read(gna_priv, GNA_MMIO_IBUFFS); | |
+ gna_priv->hw_info.in_buf_s = bld_reg & GENMASK(7, 0); | |
+ | |
+ err = gna_mmu_init(gna_priv); | |
+ if (err) | |
+ return err; | |
+ | |
+ dev_dbg(parent, "maximum memory size %llu num pd %d\n", | |
+ gna_priv->info.max_hw_mem, gna_priv->info.num_pagetables); | |
+ dev_dbg(parent, "desc rsvd size %d mmu vamax size %d\n", | |
+ gna_priv->info.desc_info.rsvd_size, | |
+ gna_priv->info.desc_info.mmu_info.vamax_size); | |
+ | |
+ mutex_init(&gna_priv->mmu_lock); | |
+ | |
+ atomic_set(&gna_priv->request_count, 0); | |
+ | |
+ mutex_init(&gna_priv->reqlist_lock); | |
+ INIT_LIST_HEAD(&gna_priv->request_list); | |
+ | |
+ init_waitqueue_head(&gna_priv->dev_busy_waitq); | |
+ | |
+ err = gna_workqueue_init(gna_priv); | |
+ if (err) | |
+ return err; | |
+ | |
+ err = devm_request_irq(parent, irq, gna_interrupt, | |
+ IRQF_SHARED, gna_name(gna_priv), gna_priv); | |
+ if (err) | |
+ return err; | |
+ | |
+ dev_set_drvdata(parent, drm_dev); | |
+ | |
+ err = gna_drm_dev_init(drm_dev); | |
+ if (err) | |
+ return err; | |
+ | |
+ gna_pm_init(parent); | |
+ err = drmm_add_action(drm_dev, gna_pm_fini, parent); | |
+ if (err) | |
+ return err; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static u32 gna_device_type_by_hwid(u32 hwid) | |
+{ | |
+ switch (hwid) { | |
+ case GNA_DEV_HWID_CNL: | |
+ return GNA_DEV_TYPE_0_9; | |
+ case GNA_DEV_HWID_GLK: | |
+ case GNA_DEV_HWID_EHL: | |
+ case GNA_DEV_HWID_ICL: | |
+ return GNA_DEV_TYPE_1_0; | |
+ case GNA_DEV_HWID_JSL: | |
+ case GNA_DEV_HWID_TGL: | |
+ case GNA_DEV_HWID_RKL: | |
+ return GNA_DEV_TYPE_2_0; | |
+ case GNA_DEV_HWID_ADL: | |
+ case GNA_DEV_HWID_RPL: | |
+ return GNA_DEV_TYPE_3_0; | |
+ case GNA_DEV_HWID_MTL: | |
+ return GNA_DEV_TYPE_3_5; | |
+ default: | |
+ return 0; | |
+ } | |
+} | |
+ | |
+int gna_getparam(struct gna_device *gna_priv, union gna_parameter *param) | |
+{ | |
+ switch (param->in.id) { | |
+ case GNA_PARAM_RECOVERY_TIMEOUT: | |
+ param->out.value = jiffies_to_msecs(gna_priv->recovery_timeout_jiffies) / 1000; | |
+ break; | |
+ case GNA_PARAM_INPUT_BUFFER_S: | |
+ param->out.value = gna_priv->hw_info.in_buf_s; | |
+ break; | |
+ case GNA_PARAM_DEVICE_TYPE: | |
+ param->out.value = gna_device_type_by_hwid(gna_priv->info.hwid); | |
+ break; | |
+ case GNA_PARAM_DDI_VERSION: | |
+ param->out.value = GNA_DDI_VERSION_CURRENT; | |
+ break; | |
+ default: | |
+ dev_dbg(gna_dev(gna_priv), "unknown parameter id: %llu\n", param->in.id); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+MODULE_AUTHOR("Intel Corporation"); | |
+MODULE_DESCRIPTION("Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA) Driver"); | |
+MODULE_LICENSE("GPL"); | |
diff --git a/drivers/gpu/drm/gna/gna_device.h b/drivers/gpu/drm/gna/gna_device.h | |
new file mode 100644 | |
index 000000000000..6eae0f2f44df | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_device.h | |
@@ -0,0 +1,114 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_DEVICE_H__ | |
+#define __GNA_DEVICE_H__ | |
+ | |
+#include <drm/drm_device.h> | |
+#include <drm/drm_gem_shmem_helper.h> | |
+ | |
+#include <linux/atomic.h> | |
+#include <linux/io.h> | |
+#include <linux/list.h> | |
+#include <linux/mutex.h> | |
+#include <linux/pm_runtime.h> | |
+#include <linux/types.h> | |
+ | |
+#include "gna_gem.h" | |
+#include "gna_hw.h" | |
+#include "gna_mem.h" | |
+ | |
+#define DRIVER_NAME "gna" | |
+#define DRIVER_DESC "Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA)" | |
+#define DRIVER_DATE "20211201" | |
+ | |
+#define DRIVER_MAJOR 1 | |
+#define DRIVER_MINOR 0 | |
+#define DRIVER_PATCHLEVEL 0 | |
+ | |
+struct workqueue_struct; | |
+union gna_parameter; | |
+struct drm_file; | |
+struct device; | |
+ | |
+struct gna_device { | |
+ struct drm_device drm; | |
+ | |
+ int recovery_timeout_jiffies; | |
+ | |
+ /* hardware status set by interrupt handler */ | |
+ u32 hw_status; | |
+ | |
+ /* device related resources */ | |
+ void __iomem *iobase; | |
+ struct gna_dev_info info; | |
+ struct gna_hw_info hw_info; | |
+ | |
+ struct gna_mmu_object mmu; | |
+ struct mutex mmu_lock; | |
+ | |
+ /* if true, then gna device is processing */ | |
+ bool dev_busy; | |
+ struct wait_queue_head dev_busy_waitq; | |
+ | |
+ struct list_head request_list; | |
+ /* protects request_list */ | |
+ struct mutex reqlist_lock; | |
+ struct workqueue_struct *request_wq; | |
+ atomic_t request_count; | |
+ | |
+ /* requests that are in queue to be run +1 for currently processed one */ | |
+ atomic_t enqueued_requests; | |
+}; | |
+ | |
+int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase, int irq); | |
+int gna_getparam(struct gna_device *gna_priv, union gna_parameter *param); | |
+ | |
+int gna_getparam_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file); | |
+ | |
+int gna_gem_new_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file); | |
+ | |
+int gna_gem_free_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file); | |
+ | |
+int gna_score_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file); | |
+ | |
+int gna_wait_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file); | |
+ | |
+extern const struct dev_pm_ops __maybe_unused gna_pm; | |
+ | |
+static inline u32 gna_reg_read(struct gna_device *gna_priv, u32 reg) | |
+{ | |
+ return readl(gna_priv->iobase + reg); | |
+} | |
+ | |
+static inline void gna_reg_write(struct gna_device *gna_priv, u32 reg, u32 val) | |
+{ | |
+ writel(val, gna_priv->iobase + reg); | |
+} | |
+ | |
+static inline const char *gna_name(struct gna_device *gna_priv) | |
+{ | |
+ return gna_priv->drm.unique; | |
+} | |
+ | |
+static inline struct device *gna_dev(struct gna_device *gna_priv) | |
+{ | |
+ return gna_priv->drm.dev; | |
+} | |
+ | |
+static inline struct gna_device *to_gna_device(struct drm_device *dev) | |
+{ | |
+ return container_of(dev, struct gna_device, drm); | |
+} | |
+ | |
+static inline struct gna_gem_object *to_gna_gem_obj(struct drm_gem_shmem_object *drm_gem_shmem) | |
+{ | |
+ return container_of(drm_gem_shmem, struct gna_gem_object, base); | |
+} | |
+ | |
+#endif /* __GNA_DEVICE_H__ */ | |
diff --git a/drivers/gpu/drm/gna/gna_gem.h b/drivers/gpu/drm/gna/gna_gem.h | |
new file mode 100644 | |
index 000000000000..92372fc93718 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_gem.h | |
@@ -0,0 +1,22 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_GEM_H__ | |
+#define __GNA_GEM_H__ | |
+ | |
+#include <drm/drm_gem_shmem_helper.h> | |
+ | |
+#include <linux/wait.h> | |
+#include <linux/workqueue.h> | |
+ | |
+struct gna_gem_object { | |
+ struct drm_gem_shmem_object base; | |
+ | |
+ uint32_t handle; | |
+ | |
+ struct work_struct work; | |
+ | |
+ struct wait_queue_head waitq; | |
+}; | |
+ | |
+#endif /* __GNA_GEM_H__ */ | |
diff --git a/drivers/gpu/drm/gna/gna_hw.c b/drivers/gpu/drm/gna/gna_hw.c | |
new file mode 100644 | |
index 000000000000..dff7c6b3edea | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_hw.c | |
@@ -0,0 +1,110 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <linux/bitfield.h> | |
+#include <linux/iopoll.h> | |
+ | |
+#include <uapi/drm/gna_drm.h> | |
+ | |
+#include "gna_device.h" | |
+#include "gna_hw.h" | |
+ | |
+int gna_parse_hw_status(struct gna_device *gna_priv, u32 hw_status) | |
+{ | |
+ if (hw_status & GNA_ERROR) { | |
+ dev_dbg(gna_dev(gna_priv), "GNA completed with errors: %#x\n", hw_status); | |
+ return -EIO; | |
+ } | |
+ | |
+ if (hw_status & GNA_STS_SCORE_COMPLETED) { | |
+ dev_dbg(gna_dev(gna_priv), "GNA completed successfully: %#x\n", hw_status); | |
+ return 0; | |
+ } | |
+ | |
+ dev_dbg(gna_dev(gna_priv), "GNA not completed, status: %#x\n", hw_status); | |
+ return -ENODATA; | |
+} | |
+ | |
+void gna_print_error_status(struct gna_device *gna_priv, u32 hw_status) | |
+{ | |
+ if (hw_status & GNA_STS_PARAM_OOR) | |
+ dev_dbg(gna_dev(gna_priv), "GNA error: Param Out Range Error\n"); | |
+ | |
+ if (hw_status & GNA_STS_VA_OOR) | |
+ dev_dbg(gna_dev(gna_priv), "GNA error: VA Out of Range Error\n"); | |
+ | |
+ if (hw_status & GNA_STS_PCI_MMU_ERR) | |
+ dev_dbg(gna_dev(gna_priv), "GNA error: PCI MMU Error\n"); | |
+ | |
+ if (hw_status & GNA_STS_PCI_DMA_ERR) | |
+ dev_dbg(gna_dev(gna_priv), "GNA error: PCI MMU Error\n"); | |
+ | |
+ if (hw_status & GNA_STS_PCI_UNEXCOMPL_ERR) | |
+ dev_dbg(gna_dev(gna_priv), "GNA error: PCI Unexpected Completion Error\n"); | |
+ | |
+ if (hw_status & GNA_STS_SATURATE) | |
+ dev_dbg(gna_dev(gna_priv), "GNA error: Saturation Reached !\n"); | |
+} | |
+ | |
+bool gna_hw_perf_enabled(struct gna_device *gna_priv) | |
+{ | |
+ u32 ctrl = gna_reg_read(gna_priv, GNA_MMIO_CTRL); | |
+ | |
+ return !!FIELD_GET(GNA_CTRL_COMP_STATS_EN, ctrl); | |
+} | |
+ | |
+void gna_start_scoring(struct gna_device *gna_priv, | |
+ struct gna_compute_cfg *compute_cfg) | |
+{ | |
+ u32 ctrl = gna_reg_read(gna_priv, GNA_MMIO_CTRL); | |
+ | |
+ ctrl |= GNA_CTRL_START_ACCEL | GNA_CTRL_COMP_INT_EN | GNA_CTRL_ERR_INT_EN; | |
+ | |
+ ctrl &= ~GNA_CTRL_COMP_STATS_EN; | |
+ ctrl |= FIELD_PREP(GNA_CTRL_COMP_STATS_EN, | |
+ compute_cfg->hw_perf_encoding & FIELD_MAX(GNA_CTRL_COMP_STATS_EN)); | |
+ | |
+ ctrl &= ~GNA_CTRL_ACTIVE_LIST_EN; | |
+ ctrl |= FIELD_PREP(GNA_CTRL_ACTIVE_LIST_EN, | |
+ compute_cfg->active_list_on & FIELD_MAX(GNA_CTRL_ACTIVE_LIST_EN)); | |
+ | |
+ ctrl &= ~GNA_CTRL_OP_MODE; | |
+ ctrl |= FIELD_PREP(GNA_CTRL_OP_MODE, | |
+ compute_cfg->gna_mode & FIELD_MAX(GNA_CTRL_OP_MODE)); | |
+ | |
+ gna_reg_write(gna_priv, GNA_MMIO_CTRL, ctrl); | |
+} | |
+ | |
+static void gna_clear_saturation(struct gna_device *gna_priv) | |
+{ | |
+ u32 val; | |
+ | |
+ val = gna_reg_read(gna_priv, GNA_MMIO_STS); | |
+ if (val & GNA_STS_SATURATE) { | |
+ dev_dbg(gna_dev(gna_priv), "status (saturation): %#x\n", val); | |
+ | |
+ val = val & GNA_STS_SATURATE; | |
+ gna_reg_write(gna_priv, GNA_MMIO_STS, val); | |
+ } | |
+} | |
+ | |
+int gna_abort_hw(struct gna_device *gna_priv) | |
+{ | |
+ u32 val; | |
+ | |
+ /* saturation bit in the GNA status register needs | |
+ * to be explicitly cleared. | |
+ */ | |
+ gna_clear_saturation(gna_priv); | |
+ | |
+ val = gna_reg_read(gna_priv, GNA_MMIO_STS); | |
+ dev_dbg(gna_dev(gna_priv), "status (before abort): %#x\n", val); | |
+ | |
+ val = gna_reg_read(gna_priv, GNA_MMIO_CTRL); | |
+ val |= GNA_CTRL_ABORT_CLR_ACCEL; | |
+ gna_reg_write(gna_priv, GNA_MMIO_CTRL, val); | |
+ | |
+ return readl_poll_timeout(gna_priv->iobase + GNA_MMIO_STS, val, | |
+ !(val & 0x1), | |
+ 0, 1000); | |
+} | |
diff --git a/drivers/gpu/drm/gna/gna_hw.h b/drivers/gpu/drm/gna/gna_hw.h | |
new file mode 100644 | |
index 000000000000..97338e1be3b6 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_hw.h | |
@@ -0,0 +1,107 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_HW_H__ | |
+#define __GNA_HW_H__ | |
+ | |
+#include <linux/bits.h> | |
+#include <linux/mm_types.h> | |
+ | |
+struct gna_compute_cfg; | |
+struct gna_device; | |
+ | |
+#define GNA_FEATURES \ | |
+ .max_hw_mem = 256 * 1024 * 1024, \ | |
+ .num_pagetables = 64, \ | |
+ .num_page_entries = PAGE_SIZE / sizeof(u32), \ | |
+ /* desc_info all in bytes */ \ | |
+ .desc_info = { \ | |
+ .rsvd_size = 256, \ | |
+ .cfg_size = 256, \ | |
+ .desc_size = 784, \ | |
+ .mmu_info = { \ | |
+ .vamax_size = 4, \ | |
+ .rsvd_size = 12, \ | |
+ .pd_size = 4 * 64, \ | |
+ }, \ | |
+ } | |
+ | |
+#define GNA_GEN1_FEATURES \ | |
+ GNA_FEATURES, \ | |
+ .max_layer_count = 1024 | |
+ | |
+#define GNA_GEN2_FEATURES \ | |
+ GNA_FEATURES, \ | |
+ .max_layer_count = 4096 | |
+ | |
+#define GNA_DEV_HWID_CNL 0x5A11 | |
+#define GNA_DEV_HWID_EHL 0x4511 | |
+#define GNA_DEV_HWID_GLK 0x3190 | |
+#define GNA_DEV_HWID_ICL 0x8A11 | |
+#define GNA_DEV_HWID_JSL 0x4E11 | |
+#define GNA_DEV_HWID_TGL 0x9A11 | |
+#define GNA_DEV_HWID_RKL 0x4C11 | |
+#define GNA_DEV_HWID_ADL 0x464F | |
+#define GNA_DEV_HWID_RPL 0xA74F | |
+#define GNA_DEV_HWID_MTL 0x7E4C | |
+ | |
+/* GNA MMIO registers */ | |
+#define GNA_MMIO_STS 0x80 | |
+#define GNA_MMIO_CTRL 0x84 | |
+#define GNA_MMIO_PTC 0x8C | |
+#define GNA_MMIO_PSC 0x90 | |
+#define GNA_MMIO_D0I3C 0xA8 | |
+#define GNA_MMIO_DESBASE 0xB0 | |
+#define GNA_MMIO_IBUFFS 0xB4 | |
+ | |
+#define GNA_PT_ENTRY_SIZE 4 | |
+/* there are up to 1024 32-bit pointers in one page in Page Table (L1) */ | |
+#define GNA_PT_LENGTH (PAGE_SIZE / GNA_PT_ENTRY_SIZE) | |
+ | |
+#define GNA_PGDIRN_LEN 64 | |
+#define GNA_PGDIR_ENTRIES 1024 /* 32-bit page addresses */ | |
+#define GNA_PGDIR_INVALID 1 | |
+ | |
+#define GNA_CTRL_START_ACCEL BIT(0) | |
+#define GNA_CTRL_ACTIVE_LIST_EN BIT(1) | |
+#define GNA_CTRL_ABORT_CLR_ACCEL BIT(2) | |
+#define GNA_CTRL_OP_MODE GENMASK(6, 5) | |
+#define GNA_CTRL_COMP_INT_EN BIT(8) | |
+#define GNA_CTRL_ERR_INT_EN BIT(10) | |
+#define GNA_CTRL_COMP_STATS_EN GENMASK(15, 12) | |
+ | |
+struct gna_mmu_info { | |
+ u32 vamax_size; | |
+ u32 rsvd_size; | |
+ u32 pd_size; | |
+}; | |
+ | |
+struct gna_desc_info { | |
+ u32 rsvd_size; | |
+ u32 cfg_size; | |
+ u32 desc_size; | |
+ struct gna_mmu_info mmu_info; | |
+}; | |
+ | |
+struct gna_hw_info { | |
+ u8 in_buf_s; | |
+}; | |
+ | |
+struct gna_dev_info { | |
+ u32 hwid; | |
+ u32 num_pagetables; | |
+ u32 num_page_entries; | |
+ u32 max_layer_count; | |
+ u64 max_hw_mem; | |
+ | |
+ struct gna_desc_info desc_info; | |
+}; | |
+ | |
+int gna_abort_hw(struct gna_device *gna_priv); | |
+bool gna_hw_perf_enabled(struct gna_device *gna_priv); | |
+int gna_parse_hw_status(struct gna_device *gna_priv, u32 hw_status); | |
+void gna_print_error_status(struct gna_device *gna_priv, u32 hw_status); | |
+void gna_start_scoring(struct gna_device *gna_priv, | |
+ struct gna_compute_cfg *compute_cfg); | |
+ | |
+#endif // __GNA_HW_H__ | |
diff --git a/drivers/gpu/drm/gna/gna_ioctl.c b/drivers/gpu/drm/gna/gna_ioctl.c | |
new file mode 100644 | |
index 000000000000..f3c805e946ee | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_ioctl.c | |
@@ -0,0 +1,208 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <drm/drm_device.h> | |
+#include <drm/drm_gem_shmem_helper.h> | |
+#include <drm/drm_file.h> | |
+ | |
+#include <linux/jiffies.h> | |
+#include <linux/kref.h> | |
+#include <linux/list.h> | |
+#include <linux/mutex.h> | |
+#include <linux/wait.h> | |
+#include <linux/workqueue.h> | |
+ | |
+#include <uapi/drm/gna_drm.h> | |
+ | |
+#include "gna_device.h" | |
+#include "gna_gem.h" | |
+#include "gna_request.h" | |
+ | |
+int gna_score_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file) | |
+{ | |
+ union gna_compute *score_args = data; | |
+ u64 request_id; | |
+ int ret; | |
+ | |
+ ret = gna_validate_score_config(&score_args->in.config, to_gna_device(dev)); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ ret = gna_enqueue_request(&score_args->in.config, file, &request_id); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ score_args->out.request_id = request_id; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static struct gna_request *gna_find_request_by_id(u64 req_id, struct gna_device *gna_priv) | |
+{ | |
+ struct gna_request *req, *found_req; | |
+ struct list_head *reqs_list; | |
+ | |
+ mutex_lock(&gna_priv->reqlist_lock); | |
+ | |
+ reqs_list = &gna_priv->request_list; | |
+ found_req = NULL; | |
+ if (!list_empty(reqs_list)) { | |
+ list_for_each_entry(req, reqs_list, node) { | |
+ if (req_id == req->request_id) { | |
+ found_req = req; | |
+ kref_get(&found_req->refcount); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ | |
+ mutex_unlock(&gna_priv->reqlist_lock); | |
+ | |
+ return found_req; | |
+} | |
+ | |
+int gna_wait_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file) | |
+{ | |
+ struct gna_device *gna_priv = to_gna_device(dev); | |
+ union gna_wait *wait_data = data; | |
+ struct gna_request *score_request; | |
+ u64 request_id; | |
+ u32 timeout; | |
+ int ret = 0; | |
+ | |
+ request_id = wait_data->in.request_id; | |
+ timeout = wait_data->in.timeout; | |
+ | |
+ score_request = gna_find_request_by_id(request_id, gna_priv); | |
+ | |
+ if (!score_request) { | |
+ dev_dbg(gna_dev(gna_priv), "could not find request, id: %llu\n", request_id); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (score_request->drm_f != file) { | |
+ dev_dbg(gna_dev(gna_priv), "illegal file_priv: %p != %p\n", score_request->drm_f, file); | |
+ ret = -EINVAL; | |
+ goto out; | |
+ } | |
+ | |
+ ret = wait_event_interruptible_timeout(score_request->waitq, score_request->state == DONE, | |
+ msecs_to_jiffies(timeout)); | |
+ if (ret == 0 || ret == -ERESTARTSYS) { | |
+ dev_dbg(gna_dev(gna_priv), "request timed out, id: %llu\n", request_id); | |
+ ret = -EBUSY; | |
+ goto out; | |
+ } | |
+ | |
+ wait_data->out.hw_perf = score_request->hw_perf; | |
+ wait_data->out.drv_perf = score_request->drv_perf; | |
+ wait_data->out.hw_status = score_request->hw_status; | |
+ | |
+ ret = score_request->status; | |
+ | |
+ dev_dbg(gna_dev(gna_priv), "request status: %d, hw status: %#x\n", | |
+ score_request->status, score_request->hw_status); | |
+ | |
+ cancel_work_sync(&score_request->work); | |
+ mutex_lock(&gna_priv->reqlist_lock); | |
+ if (!list_empty(&score_request->node)) { | |
+ list_del_init(&score_request->node); | |
+ kref_put(&score_request->refcount, gna_request_release); // due to gna_priv->request_list removal! | |
+ } | |
+ mutex_unlock(&gna_priv->reqlist_lock); | |
+ | |
+out: | |
+ kref_put(&score_request->refcount, gna_request_release); | |
+ return ret; | |
+} | |
+ | |
+int gna_gem_free_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file) | |
+{ | |
+ struct gna_device *gna_priv = to_gna_device(dev); | |
+ struct gna_gem_free *args = data; | |
+ struct gna_gem_object *gnagemo; | |
+ struct drm_gem_object *drmgemo; | |
+ int ret; | |
+ | |
+ drmgemo = drm_gem_object_lookup(file, args->handle); | |
+ if (!drmgemo) | |
+ return -ENOENT; | |
+ | |
+ gnagemo = to_gna_gem_obj(to_drm_gem_shmem_obj(drmgemo)); | |
+ | |
+ queue_work(gna_priv->request_wq, &gnagemo->work); | |
+ if (wait_event_interruptible(gnagemo->waitq, true)) { | |
+ ret = -ERESTARTSYS; | |
+ goto out; | |
+ } | |
+ | |
+ cancel_work_sync(&gnagemo->work); | |
+ | |
+ ret = drm_gem_handle_delete(file, args->handle); | |
+ | |
+out: | |
+ drm_gem_object_put(drmgemo); | |
+ return ret; | |
+} | |
+ | |
+int gna_getparam_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file) | |
+{ | |
+ struct gna_device *gna_priv = to_gna_device(dev); | |
+ union gna_parameter *param = data; | |
+ | |
+ return gna_getparam(gna_priv, param); | |
+} | |
+ | |
+static struct drm_gem_shmem_object * | |
+drm_gem_shmem_create_with_handle(struct drm_file *file_priv, | |
+ struct drm_device *dev, size_t size, | |
+ uint32_t *handle) | |
+{ | |
+ struct drm_gem_shmem_object *shmem; | |
+ int ret; | |
+ | |
+ shmem = drm_gem_shmem_create(dev, size); | |
+ if (IS_ERR(shmem)) | |
+ return shmem; | |
+ | |
+ /* | |
+ * Allocate an id of idr table where the obj is registered | |
+ * and handle has the id what user can see. | |
+ */ | |
+ ret = drm_gem_handle_create(file_priv, &shmem->base, handle); | |
+ /* drop reference from allocate - handle holds it now. */ | |
+ drm_gem_object_put(&shmem->base); | |
+ if (ret) | |
+ return ERR_PTR(ret); | |
+ | |
+ return shmem; | |
+} | |
+ | |
+int gna_gem_new_ioctl(struct drm_device *dev, void *data, | |
+ struct drm_file *file) | |
+{ | |
+ struct drm_gem_shmem_object *drmgemshm; | |
+ struct gna_gem_object *gnagemo; | |
+ union gna_gem_new *args = data; | |
+ | |
+ drmgemshm = drm_gem_shmem_create_with_handle(file, dev, args->in.size, | |
+ &args->out.handle); | |
+ | |
+ if (IS_ERR(drmgemshm)) | |
+ return PTR_ERR(drmgemshm); | |
+ | |
+ args->out.size_granted = drmgemshm->base.size; | |
+ args->out.vma_fake_offset = drm_vma_node_offset_addr(&drmgemshm->base.vma_node); | |
+ | |
+ gnagemo = to_gna_gem_obj(drmgemshm); | |
+ gnagemo->handle = args->out.handle; | |
+ | |
+ INIT_WORK(&gnagemo->work, gna_gem_obj_release_work); | |
+ init_waitqueue_head(&gnagemo->waitq); | |
+ | |
+ return 0; | |
+} | |
diff --git a/drivers/gpu/drm/gna/gna_mem.c b/drivers/gpu/drm/gna/gna_mem.c | |
new file mode 100644 | |
index 000000000000..bf57302fbc02 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_mem.c | |
@@ -0,0 +1,249 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <drm/drm_gem.h> | |
+#include <drm/drm_gem_shmem_helper.h> | |
+#include <drm/drm_managed.h> | |
+ | |
+#include <linux/atomic.h> | |
+#include <linux/device.h> | |
+#include <linux/dma-mapping.h> | |
+#include <linux/kref.h> | |
+#include <linux/list.h> | |
+#include <linux/math.h> | |
+#include <linux/mm.h> | |
+#include <linux/mutex.h> | |
+#include <linux/scatterlist.h> | |
+#include <linux/slab.h> | |
+#include <linux/string.h> | |
+#include <linux/wait.h> | |
+#include <linux/workqueue.h> | |
+ | |
+#include "gna_device.h" | |
+#include "gna_gem.h" | |
+#include "gna_mem.h" | |
+#include "gna_request.h" | |
+ | |
+static void gna_mmu_set(struct gna_device *gna_priv) | |
+{ | |
+ struct gna_mmu_object *mmu; | |
+ dma_addr_t pagetable_dma; | |
+ u32 *pgdirn; | |
+ int i; | |
+ | |
+ mmu = &gna_priv->mmu; | |
+ | |
+ pgdirn = mmu->hwdesc->mmu.pagedir_n; | |
+ | |
+ for (i = 0; i < mmu->num_pagetables; i++) { | |
+ pagetable_dma = mmu->pagetables_dma[i]; | |
+ pgdirn[i] = pagetable_dma >> PAGE_SHIFT; | |
+ } | |
+ | |
+ for (; i < GNA_PGDIRN_LEN; i++) | |
+ pgdirn[i] = GNA_PGDIR_INVALID; | |
+} | |
+ | |
+/* descriptor and page tables allocation */ | |
+int gna_mmu_init(struct gna_device *gna_priv) | |
+{ | |
+ struct device *parent = gna_dev(gna_priv); | |
+ struct gna_mmu_object *mmu; | |
+ int desc_size; | |
+ int i; | |
+ | |
+ if (gna_priv->info.num_pagetables > GNA_PGDIRN_LEN) { | |
+ dev_dbg(gna_dev(gna_priv), "number of pagetables requested too large: %u\n", gna_priv->info.num_pagetables); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ mmu = &gna_priv->mmu; | |
+ | |
+ desc_size = round_up(gna_priv->info.desc_info.desc_size, PAGE_SIZE); | |
+ | |
+ mmu->hwdesc = dmam_alloc_coherent(parent, desc_size, &mmu->hwdesc_dma, | |
+ GFP_KERNEL); | |
+ if (!mmu->hwdesc) | |
+ return -ENOMEM; | |
+ | |
+ mmu->num_pagetables = gna_priv->info.num_pagetables; | |
+ | |
+ mmu->pagetables_dma = drmm_kmalloc_array(&gna_priv->drm, mmu->num_pagetables, sizeof(*mmu->pagetables_dma), | |
+ GFP_KERNEL); | |
+ if (!mmu->pagetables_dma) | |
+ return -ENOMEM; | |
+ | |
+ mmu->pagetables = drmm_kmalloc_array(&gna_priv->drm, mmu->num_pagetables, sizeof(*mmu->pagetables), GFP_KERNEL); | |
+ | |
+ if (!mmu->pagetables) | |
+ return -ENOMEM; | |
+ | |
+ for (i = 0; i < mmu->num_pagetables; i++) { | |
+ mmu->pagetables[i] = dmam_alloc_coherent(parent, PAGE_SIZE, | |
+ &mmu->pagetables_dma[i], GFP_KERNEL); | |
+ if (!mmu->pagetables[i]) | |
+ return -ENOMEM; | |
+ } | |
+ | |
+ gna_mmu_set(gna_priv); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static struct scatterlist *gna_iterate_sgl(u64 sg_elems, struct scatterlist *sgl, dma_addr_t *sg_page, | |
+ int *sg_page_len, int *sg_pages) | |
+{ | |
+ while (sg_elems-- > 0) { | |
+ (*sg_page) += PAGE_SIZE; | |
+ (*sg_pages)++; | |
+ if (*sg_pages == *sg_page_len) { | |
+ sgl = sg_next(sgl); | |
+ if (!sgl) | |
+ break; | |
+ | |
+ *sg_page = sg_dma_address(sgl); | |
+ *sg_page_len = | |
+ round_up(sg_dma_len(sgl), PAGE_SIZE) | |
+ >> PAGE_SHIFT; | |
+ *sg_pages = 0; | |
+ } | |
+ } | |
+ | |
+ return sgl; | |
+} | |
+ | |
+ | |
+void gna_mmu_add(struct gna_device *gna_priv, struct drm_gem_shmem_object *drmshmemo) | |
+{ | |
+ struct gna_mmu_object *mmu; | |
+ struct scatterlist *sgl; | |
+ dma_addr_t sg_page; | |
+ int sg_page_len; | |
+ u32 *pagetable; | |
+ u32 mmu_page; | |
+ int sg_pages; | |
+ int i; | |
+ int j; | |
+ | |
+ mmu = &gna_priv->mmu; | |
+ mutex_lock(&gna_priv->mmu_lock); | |
+ | |
+ j = mmu->filled_pages; | |
+ sgl = drmshmemo->sgt->sgl; | |
+ | |
+ if (!sgl) { | |
+ dev_warn(gna_dev(gna_priv), "empty scatter list in memory object\n"); | |
+ goto warn_empty_sgl; | |
+ } | |
+ sg_page = sg_dma_address(sgl); | |
+ sg_page_len = round_up(sg_dma_len(sgl), PAGE_SIZE) >> PAGE_SHIFT; | |
+ sg_pages = 0; | |
+ | |
+ for (i = mmu->filled_pts; i < mmu->num_pagetables; i++) { | |
+ if (!sgl) | |
+ break; | |
+ | |
+ pagetable = mmu->pagetables[i]; | |
+ | |
+ for (j = mmu->filled_pages; j < GNA_PT_LENGTH; j++) { | |
+ mmu_page = sg_page >> PAGE_SHIFT; | |
+ pagetable[j] = mmu_page; | |
+ | |
+ mmu->filled_pages++; | |
+ | |
+ sgl = gna_iterate_sgl(1, sgl, &sg_page, &sg_page_len, | |
+ &sg_pages); | |
+ if (!sgl) | |
+ break; | |
+ } | |
+ | |
+ if (j == GNA_PT_LENGTH) { | |
+ mmu->filled_pages = 0; | |
+ mmu->filled_pts++; | |
+ } | |
+ } | |
+ | |
+ mmu->hwdesc->mmu.vamaxaddr = | |
+ (mmu->filled_pts * PAGE_SIZE * GNA_PGDIR_ENTRIES) + | |
+ (mmu->filled_pages * PAGE_SIZE) - 1; | |
+ dev_dbg(gna_dev(gna_priv), "vamaxaddr: %u\n", mmu->hwdesc->mmu.vamaxaddr); | |
+ | |
+warn_empty_sgl: | |
+ mutex_unlock(&gna_priv->mmu_lock); | |
+} | |
+ | |
+void gna_mmu_clear(struct gna_device *gna_priv) | |
+{ | |
+ struct gna_mmu_object *mmu; | |
+ int i; | |
+ | |
+ mmu = &gna_priv->mmu; | |
+ mutex_lock(&gna_priv->mmu_lock); | |
+ | |
+ for (i = 0; i < mmu->filled_pts; i++) | |
+ memset(mmu->pagetables[i], 0, PAGE_SIZE); | |
+ | |
+ if (mmu->filled_pages > 0) | |
+ memset(mmu->pagetables[mmu->filled_pts], 0, mmu->filled_pages * GNA_PT_ENTRY_SIZE); | |
+ | |
+ mmu->filled_pts = 0; | |
+ mmu->filled_pages = 0; | |
+ mmu->hwdesc->mmu.vamaxaddr = 0; | |
+ | |
+ mutex_unlock(&gna_priv->mmu_lock); | |
+} | |
+ | |
+bool gna_gem_object_put_pages_sgt(struct gna_gem_object *gnagemo) | |
+{ | |
+ struct drm_gem_shmem_object *shmem = &gnagemo->base; | |
+ struct drm_gem_object *drmgemo = &shmem->base; | |
+ | |
+ if (!dma_resv_trylock(shmem->base.resv)) | |
+ return false; | |
+ dma_unmap_sgtable(drmgemo->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); | |
+ sg_free_table(shmem->sgt); | |
+ kfree(shmem->sgt); | |
+ shmem->sgt = NULL; | |
+ dma_resv_unlock(shmem->base.resv); | |
+ | |
+ drm_gem_shmem_put_pages(shmem); | |
+ | |
+ return true; | |
+} | |
+ | |
+static void gna_delete_score_requests(u32 handle, struct gna_device *gna_priv) | |
+{ | |
+ struct gna_request *req, *temp_req; | |
+ struct list_head *reqs_list; | |
+ int i; | |
+ | |
+ mutex_lock(&gna_priv->reqlist_lock); | |
+ | |
+ reqs_list = &gna_priv->request_list; | |
+ if (!list_empty(reqs_list)) { | |
+ list_for_each_entry_safe(req, temp_req, reqs_list, node) { | |
+ for (i = 0; i < req->buffer_count; ++i) { | |
+ if (req->buffer_list[i].gna.handle == handle) { | |
+ list_del_init(&req->node); | |
+ cancel_work_sync(&req->work); | |
+ atomic_dec(&gna_priv->enqueued_requests); | |
+ kref_put(&req->refcount, gna_request_release); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ } | |
+ | |
+ mutex_unlock(&gna_priv->reqlist_lock); | |
+} | |
+ | |
+void gna_gem_obj_release_work(struct work_struct *work) | |
+{ | |
+ struct gna_gem_object *gnagemo; | |
+ | |
+ gnagemo = container_of(work, struct gna_gem_object, work); | |
+ | |
+ gna_delete_score_requests(gnagemo->handle, to_gna_device(gnagemo->base.base.dev)); | |
+ | |
+ wake_up_interruptible(&gnagemo->waitq); | |
+} | |
diff --git a/drivers/gpu/drm/gna/gna_mem.h b/drivers/gpu/drm/gna/gna_mem.h | |
new file mode 100644 | |
index 000000000000..92193f9d608d | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_mem.h | |
@@ -0,0 +1,58 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_MEM_H__ | |
+#define __GNA_MEM_H__ | |
+ | |
+#include <linux/types.h> | |
+ | |
+#include "gna_hw.h" | |
+ | |
+struct gna_gem_object; | |
+struct work_struct; | |
+struct gna_device; | |
+ | |
+struct gna_xnn_descriptor { | |
+ u32 labase; | |
+ u16 lacount; | |
+ u16 _rsvd; | |
+}; | |
+ | |
+struct gna_mmu { | |
+ u32 vamaxaddr; | |
+ u8 __res_204[12]; | |
+ u32 pagedir_n[GNA_PGDIRN_LEN]; | |
+}; | |
+ | |
+struct gna_hw_descriptor { | |
+ u8 __res_0000[256]; | |
+ struct gna_xnn_descriptor xnn_config; | |
+ u8 __unused[248]; | |
+ struct gna_mmu mmu; | |
+}; | |
+ | |
+struct gna_mmu_object { | |
+ struct gna_hw_descriptor *hwdesc; | |
+ | |
+ dma_addr_t hwdesc_dma; | |
+ | |
+ u32 **pagetables; | |
+ dma_addr_t *pagetables_dma; | |
+ | |
+ u32 num_pagetables; | |
+ | |
+ u32 filled_pts; | |
+ u32 filled_pages; | |
+}; | |
+ | |
+int gna_mmu_init(struct gna_device *gna_priv); | |
+ | |
+void gna_mmu_add(struct gna_device *gna_priv, struct drm_gem_shmem_object *drmshmemo); | |
+ | |
+void gna_mmu_clear(struct gna_device *gna_priv); | |
+ | |
+bool gna_gem_object_put_pages_sgt(struct gna_gem_object *gna_obj); | |
+ | |
+void gna_gem_obj_release_work(struct work_struct *work); | |
+ | |
+#endif // __GNA_MEM_H__ | |
diff --git a/drivers/gpu/drm/gna/gna_pci.c b/drivers/gpu/drm/gna/gna_pci.c | |
new file mode 100644 | |
index 000000000000..eaae42142bfb | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_pci.c | |
@@ -0,0 +1,148 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <linux/device.h> | |
+#include <linux/module.h> | |
+#include <linux/pci.h> | |
+ | |
+#include "gna_device.h" | |
+#include "gna_hw.h" | |
+#include "gna_pci.h" | |
+ | |
+static const struct gna_dev_info cnl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_CNL, | |
+ GNA_GEN1_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info glk_dev_info = { | |
+ .hwid = GNA_DEV_HWID_GLK, | |
+ GNA_GEN1_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info ehl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_EHL, | |
+ GNA_GEN1_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info icl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_ICL, | |
+ GNA_GEN1_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info jsl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_JSL, | |
+ GNA_GEN2_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info tgl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_TGL, | |
+ GNA_GEN2_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info rkl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_RKL, | |
+ GNA_GEN2_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info adl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_ADL, | |
+ GNA_GEN2_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info rpl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_RPL, | |
+ GNA_GEN2_FEATURES | |
+}; | |
+ | |
+static const struct gna_dev_info mtl_dev_info = { | |
+ .hwid = GNA_DEV_HWID_MTL, | |
+ GNA_GEN2_FEATURES | |
+}; | |
+ | |
+#define INTEL_GNA_DEVICE(hwid, info) \ | |
+ { PCI_VDEVICE(INTEL, hwid), (kernel_ulong_t)(info) } | |
+ | |
+static const struct pci_device_id gna_pci_ids[] = { | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_CNL, &cnl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_EHL, &ehl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_GLK, &glk_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_ICL, &icl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_JSL, &jsl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_TGL, &tgl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_RKL, &rkl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_ADL, &adl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_RPL, &rpl_dev_info), | |
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_MTL, &mtl_dev_info), | |
+ { } | |
+}; | |
+ | |
+static void gna_irq_vectors_fini(void *data) | |
+{ | |
+ struct pci_dev *pcidev = data; | |
+ | |
+ pci_free_irq_vectors(pcidev); | |
+} | |
+ | |
+static int gna_irq_vectors_init(struct pci_dev *pcidev) | |
+{ | |
+ int ret; | |
+ | |
+ ret = pci_alloc_irq_vectors(pcidev, 1, 1, PCI_IRQ_ALL_TYPES); | |
+ if (ret < 0) | |
+ return ret; | |
+ | |
+ ret = devm_add_action(&pcidev->dev, gna_irq_vectors_fini, pcidev); | |
+ if (ret) | |
+ gna_irq_vectors_fini(pcidev); | |
+ | |
+ return ret; | |
+} | |
+ | |
+int gna_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *pci_id) | |
+{ | |
+ struct gna_dev_info *dev_info; | |
+ void __iomem *iobase; | |
+ int irq; | |
+ int err; | |
+ | |
+ err = pcim_enable_device(pcidev); | |
+ if (err) | |
+ return err; | |
+ | |
+ err = pcim_iomap_regions(pcidev, BIT(0), pci_name(pcidev)); | |
+ if (err) | |
+ return err; | |
+ | |
+ iobase = pcim_iomap_table(pcidev)[0]; | |
+ | |
+ pci_set_master(pcidev); | |
+ | |
+ err = gna_irq_vectors_init(pcidev); | |
+ if (err < 0) | |
+ return err; | |
+ | |
+ irq = pci_irq_vector(pcidev, 0); | |
+ if (irq < 0) | |
+ return irq; | |
+ | |
+ dev_info = (struct gna_dev_info *)pci_id->driver_data; | |
+ | |
+ err = gna_probe(&pcidev->dev, dev_info, iobase, irq); | |
+ if (err) | |
+ return err; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static struct pci_driver gna_pci_driver = { | |
+ .name = DRIVER_NAME, | |
+ .id_table = gna_pci_ids, | |
+ .probe = gna_pci_probe, | |
+ .driver = { | |
+ .pm = &gna_pm, | |
+ }, | |
+}; | |
+ | |
+module_pci_driver(gna_pci_driver); | |
+ | |
+MODULE_DEVICE_TABLE(pci, gna_pci_ids); | |
diff --git a/drivers/gpu/drm/gna/gna_pci.h b/drivers/gpu/drm/gna/gna_pci.h | |
new file mode 100644 | |
index 000000000000..b651fa2e6ea1 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_pci.h | |
@@ -0,0 +1,12 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_PCI_H__ | |
+#define __GNA_PCI_H__ | |
+ | |
+struct pci_device_id; | |
+struct pci_dev; | |
+ | |
+int gna_pci_probe(struct pci_dev *dev, const struct pci_device_id *id); | |
+ | |
+#endif /* __GNA_PCI_H__ */ | |
diff --git a/drivers/gpu/drm/gna/gna_request.c b/drivers/gpu/drm/gna/gna_request.c | |
new file mode 100644 | |
index 000000000000..1d9572bedcc2 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_request.c | |
@@ -0,0 +1,441 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <drm/drm_file.h> | |
+#include <drm/drm_gem.h> | |
+#include <drm/drm_gem_shmem_helper.h> | |
+ | |
+#include <linux/atomic.h> | |
+#include <linux/device.h> | |
+#include <linux/kernel.h> | |
+#include <linux/list.h> | |
+#include <linux/math.h> | |
+#include <linux/mutex.h> | |
+#include <linux/pm_runtime.h> | |
+#include <linux/slab.h> | |
+#include <linux/timekeeping.h> | |
+#include <linux/uaccess.h> | |
+ | |
+#include "gna_device.h" | |
+#include "gna_hw.h" | |
+#include "gna_mem.h" | |
+#include "gna_request.h" | |
+#include "gna_score.h" | |
+ | |
+int gna_validate_score_config(struct gna_compute_cfg *compute_cfg, | |
+ struct gna_device *gna_priv) | |
+{ | |
+ size_t buffers_size; | |
+ | |
+ if (compute_cfg->gna_mode > GNA_MODE_XNN) { | |
+ dev_dbg(gna_dev(gna_priv), "invalid mode: %d\n", compute_cfg->gna_mode); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (compute_cfg->layer_count > gna_priv->info.max_layer_count) { | |
+ dev_dbg(gna_dev(gna_priv), "max layer count exceeded: %u > %u\n", | |
+ compute_cfg->layer_count, gna_priv->info.max_layer_count); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (compute_cfg->buffer_count == 0) { | |
+ dev_dbg(gna_dev(gna_priv), "no buffers\n"); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ buffers_size = sizeof(struct gna_buffer) * compute_cfg->buffer_count; | |
+ if (!access_ok(u64_to_user_ptr(compute_cfg->buffers_ptr), buffers_size)) | |
+ return -EACCES; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static void gna_request_update_status(struct gna_request *score_request) | |
+{ | |
+ struct gna_device *gna_priv = to_gna_device(score_request->drm_f->minor->dev); | |
+ /* The gna_priv's hw_status should be updated first */ | |
+ u32 hw_status = gna_priv->hw_status; | |
+ u32 stall_cycles; | |
+ u32 total_cycles; | |
+ | |
+ /* Technically, the time stamp can be a bit later than | |
+ * when the hw actually completed scoring. Here we just | |
+ * do our best in a deferred work, unless we want to | |
+ * tax isr for a more accurate record. | |
+ */ | |
+ score_request->drv_perf.hw_completed = ktime_get_ns(); | |
+ | |
+ score_request->hw_status = hw_status; | |
+ | |
+ score_request->status = gna_parse_hw_status(gna_priv, hw_status); | |
+ | |
+ if (gna_hw_perf_enabled(gna_priv)) { | |
+ if (hw_status & GNA_STS_STATISTICS_VALID) { | |
+ total_cycles = gna_reg_read(gna_priv, GNA_MMIO_PTC); | |
+ stall_cycles = gna_reg_read(gna_priv, GNA_MMIO_PSC); | |
+ score_request->hw_perf.total = total_cycles; | |
+ score_request->hw_perf.stall = stall_cycles; | |
+ } else | |
+ dev_warn(gna_dev(gna_priv), "GNA statistics missing\n"); | |
+ } | |
+ if (unlikely(hw_status & GNA_ERROR)) | |
+ gna_print_error_status(gna_priv, hw_status); | |
+} | |
+ | |
+static void gna_request_make_zombie(struct gna_request *score_request) | |
+{ | |
+ int i; | |
+ | |
+ for (i = 0; i < score_request->buffer_count; i++) { | |
+ kvfree((void *)(uintptr_t)score_request->buffer_list[i].gna.patches_ptr); | |
+ drm_gem_object_put(&score_request->buffer_list[i].gem->base.base); | |
+ } | |
+ kvfree(score_request->buffer_list); | |
+ score_request->buffer_list = NULL; | |
+ score_request->buffer_count = 0; | |
+} | |
+ | |
+static void gna_request_process(struct work_struct *work) | |
+{ | |
+ struct gna_buffer_with_object *buffer; | |
+ struct gna_request *score_request; | |
+ struct gna_device *gna_priv; | |
+ unsigned long hw_timeout; | |
+ int ret; | |
+ u64 i; | |
+ | |
+ score_request = container_of(work, struct gna_request, work); | |
+ gna_priv = to_gna_device(score_request->drm_f->minor->dev); | |
+ | |
+ score_request->state = ACTIVE; | |
+ | |
+ score_request->drv_perf.pre_processing = ktime_get_ns(); | |
+ | |
+ ret = pm_runtime_get_sync(gna_dev(gna_priv)); | |
+ if (ret < 0 && ret != -EACCES) { | |
+ dev_warn(gna_dev(gna_priv), "pm_runtime_get_sync() failed: %d\n", ret); | |
+ score_request->status = -ENODEV; | |
+ pm_runtime_put_noidle(gna_dev(gna_priv)); | |
+ goto tail; | |
+ } | |
+ | |
+ /* Set busy flag before kicking off HW. The isr will clear it and wake up us. There is | |
+ * no difference if isr is missed in a timeout situation of the last request. We just | |
+ * always set it busy and let the wait_event_timeout check the reset. | |
+ * wq: X -> true | |
+ * isr: X -> false | |
+ */ | |
+ gna_priv->dev_busy = true; | |
+ | |
+ ret = gna_score(score_request); | |
+ if (ret) { | |
+ if (pm_runtime_put(gna_dev(gna_priv)) < 0) | |
+ dev_warn(gna_dev(gna_priv), "pm_runtime_put() failed: %d\n", ret); | |
+ score_request->status = ret; | |
+ goto tail; | |
+ } | |
+ | |
+ score_request->drv_perf.processing = ktime_get_ns(); | |
+ | |
+ hw_timeout = gna_priv->recovery_timeout_jiffies; | |
+ | |
+ hw_timeout = wait_event_timeout(gna_priv->dev_busy_waitq, | |
+ !gna_priv->dev_busy, hw_timeout); | |
+ | |
+ if (!hw_timeout) | |
+ dev_warn(gna_dev(gna_priv), "hardware timeout occurred\n"); | |
+ | |
+ gna_priv->hw_status = gna_reg_read(gna_priv, GNA_MMIO_STS); | |
+ | |
+ gna_request_update_status(score_request); | |
+ | |
+ ret = gna_abort_hw(gna_priv); | |
+ if (ret < 0 && score_request->status == 0) | |
+ score_request->status = ret; // -ETIMEDOUT | |
+ | |
+ ret = pm_runtime_put(gna_dev(gna_priv)); | |
+ if (ret < 0) | |
+ dev_warn(gna_dev(gna_priv), "pm_runtime_put() failed: %d\n", ret); | |
+ | |
+ gna_mmu_clear(gna_priv); | |
+ | |
+ for (i = 0, buffer = score_request->buffer_list; i < score_request->buffer_count; i++, buffer++) | |
+ gna_gem_object_put_pages_sgt(buffer->gem); | |
+ | |
+tail: | |
+ score_request->drv_perf.completion = ktime_get_ns(); | |
+ score_request->state = DONE; | |
+ gna_request_make_zombie(score_request); | |
+ | |
+ atomic_dec(&gna_priv->enqueued_requests); | |
+ wake_up_interruptible_all(&score_request->waitq); | |
+} | |
+ | |
+static struct gna_request *gna_request_create(struct drm_file *file, | |
+ struct gna_compute_cfg *compute_cfg) | |
+{ | |
+ | |
+ struct gna_device *gna_priv = file->driver_priv; | |
+ struct gna_request *score_request; | |
+ | |
+ if (IS_ERR(gna_priv)) | |
+ return NULL; | |
+ | |
+ score_request = kzalloc(sizeof(*score_request), GFP_KERNEL); | |
+ if (!score_request) | |
+ return NULL; | |
+ kref_init(&score_request->refcount); | |
+ | |
+ dev_dbg(gna_dev(gna_priv), "labase: %d, lacount: %d\n", | |
+ compute_cfg->layer_base, compute_cfg->layer_count); | |
+ | |
+ score_request->request_id = atomic_inc_return(&gna_priv->request_count); | |
+ score_request->compute_cfg = *compute_cfg; | |
+ score_request->drm_f = file; | |
+ score_request->state = NEW; | |
+ init_waitqueue_head(&score_request->waitq); | |
+ INIT_WORK(&score_request->work, gna_request_process); | |
+ INIT_LIST_HEAD(&score_request->node); | |
+ | |
+ return score_request; | |
+} | |
+ | |
+/* | |
+ * returns true if [inner_offset, inner_size) is embraced by [0, outer_size). False otherwise. | |
+ */ | |
+static bool gna_validate_ranges(u64 outer_size, u64 inner_offset, u64 inner_size) | |
+{ | |
+ return inner_offset < outer_size && | |
+ inner_size <= (outer_size - inner_offset); | |
+} | |
+ | |
+static int gna_validate_patches(struct gna_device *gna_priv, __u64 buffer_size, | |
+ struct gna_memory_patch *patches, u64 count) | |
+{ | |
+ u64 idx; | |
+ | |
+ for (idx = 0; idx < count; ++idx) { | |
+ if (patches[idx].size > 8) { | |
+ dev_dbg(gna_dev(gna_priv), "invalid patch size: %llu\n", patches[idx].size); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (!gna_validate_ranges(buffer_size, patches[idx].offset, patches[idx].size)) { | |
+ dev_dbg(gna_dev(gna_priv), | |
+ "patch out of bounds. buffer size: %llu, patch offset/size:%llu/%llu\n", | |
+ buffer_size, patches[idx].offset, patches[idx].size); | |
+ return -EINVAL; | |
+ } | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int gna_buffer_fill_patches(struct gna_buffer *buffer, struct gna_device *gna_priv) | |
+{ | |
+ __u64 patches_user = buffer->patches_ptr; | |
+ struct gna_memory_patch *patches; | |
+ /* At this point, the buffer points to a memory region in kernel space where the copied | |
+ * patches_ptr also lives, but the value of it is still an address from user space. This | |
+ * function will set patches_ptr to either an address in kernel space or null before it | |
+ * exits. | |
+ */ | |
+ u64 patch_count; | |
+ int ret; | |
+ | |
+ buffer->patches_ptr = 0; | |
+ patch_count = buffer->patch_count; | |
+ if (!patch_count) | |
+ return 0; | |
+ | |
+ patches = kvmalloc_array(patch_count, sizeof(struct gna_memory_patch), GFP_KERNEL); | |
+ if (!patches) | |
+ return -ENOMEM; | |
+ | |
+ if (copy_from_user(patches, u64_to_user_ptr(patches_user), | |
+ sizeof(struct gna_memory_patch) * patch_count)) { | |
+ ret = -EFAULT; | |
+ goto err_fill_patches; | |
+ } | |
+ | |
+ ret = gna_validate_patches(gna_priv, buffer->size, patches, patch_count); | |
+ if (ret) { | |
+ dev_dbg(gna_dev(gna_priv), "buffer %p: patches' validation failed\n", buffer); | |
+ goto err_fill_patches; | |
+ } | |
+ | |
+ buffer->patches_ptr = (uintptr_t)patches; | |
+ | |
+ return 0; | |
+ | |
+err_fill_patches: | |
+ kvfree(patches); | |
+ return ret; | |
+} | |
+ | |
+static int gna_request_fill_buffers(struct gna_request *score_request, | |
+ struct gna_compute_cfg *compute_cfg) | |
+{ | |
+ struct gna_buffer_with_object *buffer_list; | |
+ struct gna_buffer_with_object *buffer; | |
+ struct gna_buffer *cfg_buffers; | |
+ struct drm_gem_object *drmgemo; | |
+ struct gna_device *gna_priv; | |
+ u64 buffers_total_size = 0; | |
+ size_t gem_obj_size; | |
+ u64 buffer_count; | |
+ u32 handle; | |
+ u64 i, j; | |
+ int ret; | |
+ | |
+ | |
+ gna_priv = to_gna_device(score_request->drm_f->minor->dev); | |
+ | |
+ buffer_count = compute_cfg->buffer_count; | |
+ buffer_list = kvmalloc_array(buffer_count, sizeof(*buffer_list), GFP_KERNEL); | |
+ if (!buffer_list) | |
+ return -ENOMEM; | |
+ | |
+ cfg_buffers = u64_to_user_ptr(compute_cfg->buffers_ptr); | |
+ for (i = 0; i < buffer_count; ++i) { | |
+ if (copy_from_user(&buffer_list[i].gna, cfg_buffers+i, | |
+ sizeof(*buffer_list))) { | |
+ ret = -EFAULT; | |
+ goto err_free_buffers; | |
+ } | |
+ buffer_list[i].gem = NULL; | |
+ } | |
+ | |
+ for (i = 0; i < buffer_count; i++) { | |
+ buffer = &buffer_list[i]; | |
+ handle = buffer->gna.handle; | |
+ | |
+ if (buffer->gna.offset != 0) { | |
+ dev_dbg(gna_dev(gna_priv), "buffer->offset = %llu for handle %u in score config\n", | |
+ buffer->gna.offset, buffer->gna.handle); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ for (j = 0; j < i; j++) { | |
+ if (buffer_list[j].gna.handle == handle) { | |
+ dev_dbg(gna_dev(gna_priv), | |
+ "doubled memory id in score config; id:%u\n", handle); | |
+ ret = -EINVAL; | |
+ goto err_zero_patch_user_ptr; | |
+ } | |
+ } | |
+ | |
+ buffers_total_size += | |
+ round_up(buffer->gna.size, PAGE_SIZE); | |
+ if (buffers_total_size > gna_priv->info.max_hw_mem) { | |
+ dev_dbg(gna_dev(gna_priv), "buffers' %p total size too big\n", buffer); | |
+ ret = -EINVAL; | |
+ goto err_zero_patch_user_ptr; | |
+ } | |
+ | |
+ drmgemo = drm_gem_object_lookup(score_request->drm_f, handle); | |
+ | |
+ if (!drmgemo) { | |
+ dev_dbg(gna_dev(gna_priv), "memory object %u not found\n", handle); | |
+ ret = -EINVAL; | |
+ goto err_zero_patch_user_ptr; | |
+ } | |
+ | |
+ // we are still in sys call context, but prior request is enqueued. | |
+ // request may slip into queue while some gna_gem_object being deleted | |
+ // border case + not too much harm. | |
+ buffer->gem = to_gna_gem_obj(to_drm_gem_shmem_obj(drmgemo)); | |
+ | |
+ gem_obj_size = drmgemo->size; | |
+ | |
+ if (!gna_validate_ranges(gem_obj_size, 0, buffer->gna.size)) { | |
+ dev_dbg(gna_dev(gna_priv), | |
+ "buffer out of bounds. mo size: %zu, buffer size:%llu\n", | |
+ gem_obj_size, buffer->gna.size); | |
+ ret = -EINVAL; | |
+ goto err_zero_patch_user_ptr; | |
+ } | |
+ | |
+ ret = gna_buffer_fill_patches(&buffer->gna, gna_priv); | |
+ if (ret) | |
+ goto err_free_patches; | |
+ } | |
+ | |
+ score_request->buffer_list = buffer_list; | |
+ score_request->buffer_count = buffer_count; | |
+ | |
+ return 0; | |
+ | |
+err_zero_patch_user_ptr: | |
+ /* patches_ptr may still hold an address in userspace. | |
+ * Don't pass it to kvfree(). | |
+ */ | |
+ buffer->gna.patches_ptr = 0; | |
+ | |
+err_free_patches: | |
+ /* patches_ptr of each processed buffer should be either | |
+ * null or pointing to an allocated memory block in the | |
+ * kernel at this point. | |
+ */ | |
+ for (j = 0; j <= i; j++) { | |
+ kvfree((void *)(uintptr_t)buffer_list[j].gna.patches_ptr); | |
+ drm_gem_object_put(&buffer_list[j].gem->base.base); | |
+ } | |
+ | |
+err_free_buffers: | |
+ kvfree(buffer_list); | |
+ return ret; | |
+} | |
+ | |
+int gna_enqueue_request(struct gna_compute_cfg *compute_cfg, | |
+ struct drm_file *file, u64 *request_id) | |
+{ | |
+ bool is_qos = !!(compute_cfg->flags & GNA_FLAG_SCORE_QOS); | |
+ struct gna_device *gna_priv = file->driver_priv; | |
+ struct gna_request *score_request; | |
+ u64 pos_in_queue; | |
+ int ret; | |
+ | |
+ pos_in_queue = atomic_inc_return(&gna_priv->enqueued_requests); | |
+ if (is_qos && pos_in_queue != 1) { | |
+ ret = -EBUSY; | |
+ goto ERR_UNQUEUE_REQUEST; | |
+ } | |
+ | |
+ score_request = gna_request_create(file, compute_cfg); | |
+ if (!score_request) { | |
+ ret = -ENOMEM; | |
+ goto ERR_UNQUEUE_REQUEST; | |
+ } | |
+ | |
+ ret = gna_request_fill_buffers(score_request, compute_cfg); | |
+ if (ret) { | |
+ kref_put(&score_request->refcount, gna_request_release); | |
+ goto ERR_UNQUEUE_REQUEST; | |
+ } | |
+ | |
+ kref_get(&score_request->refcount); | |
+ mutex_lock(&gna_priv->reqlist_lock); | |
+ list_add_tail(&score_request->node, &gna_priv->request_list); | |
+ mutex_unlock(&gna_priv->reqlist_lock); | |
+ | |
+ queue_work(gna_priv->request_wq, &score_request->work); | |
+ kref_put(&score_request->refcount, gna_request_release); | |
+ | |
+ *request_id = score_request->request_id; | |
+ | |
+ return 0; | |
+ | |
+ERR_UNQUEUE_REQUEST: | |
+ atomic_dec(&gna_priv->enqueued_requests); | |
+ return ret; | |
+} | |
+ | |
+void gna_request_release(struct kref *ref) | |
+{ | |
+ struct gna_request *score_request = | |
+ container_of(ref, struct gna_request, refcount); | |
+ gna_request_make_zombie(score_request); | |
+ wake_up_interruptible_all(&score_request->waitq); | |
+ kfree(score_request); | |
+} | |
diff --git a/drivers/gpu/drm/gna/gna_request.h b/drivers/gpu/drm/gna/gna_request.h | |
new file mode 100644 | |
index 000000000000..d056e70fb369 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_request.h | |
@@ -0,0 +1,64 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_REQUEST_H__ | |
+#define __GNA_REQUEST_H__ | |
+ | |
+#include <linux/kref.h> | |
+#include <linux/types.h> | |
+#include <linux/wait.h> | |
+#include <linux/workqueue.h> | |
+ | |
+#include <uapi/drm/gna_drm.h> | |
+ | |
+struct gna_device; | |
+struct gna_gem_object; | |
+struct drm_file; | |
+ | |
+enum gna_request_state { | |
+ NEW, | |
+ ACTIVE, | |
+ DONE, | |
+}; | |
+ | |
+struct gna_buffer_with_object { | |
+ struct gna_buffer gna; | |
+ struct gna_gem_object *gem; | |
+}; | |
+ | |
+struct gna_request { | |
+ u64 request_id; | |
+ | |
+ struct kref refcount; | |
+ | |
+ struct drm_file *drm_f; | |
+ | |
+ u32 hw_status; | |
+ | |
+ enum gna_request_state state; | |
+ | |
+ int status; | |
+ | |
+ struct gna_hw_perf hw_perf; | |
+ struct gna_drv_perf drv_perf; | |
+ | |
+ struct list_head node; | |
+ | |
+ struct gna_compute_cfg compute_cfg; | |
+ | |
+ struct gna_buffer_with_object *buffer_list; | |
+ u64 buffer_count; | |
+ | |
+ struct work_struct work; | |
+ struct wait_queue_head waitq; | |
+}; | |
+ | |
+int gna_validate_score_config(struct gna_compute_cfg *compute_cfg, | |
+ struct gna_device *gna_priv); | |
+ | |
+int gna_enqueue_request(struct gna_compute_cfg *compute_cfg, | |
+ struct drm_file *file, u64 *request_id); | |
+ | |
+void gna_request_release(struct kref *ref); | |
+ | |
+#endif // __GNA_REQUEST_H__ | |
diff --git a/drivers/gpu/drm/gna/gna_score.c b/drivers/gpu/drm/gna/gna_score.c | |
new file mode 100644 | |
index 000000000000..529270657a83 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_score.c | |
@@ -0,0 +1,222 @@ | |
+// SPDX-License-Identifier: GPL-2.0-only | |
+// Copyright(c) 2017-2022 Intel Corporation | |
+ | |
+#include <drm/drm_gem.h> | |
+#include <drm/drm_gem_shmem_helper.h> | |
+ | |
+#include <linux/dma-buf.h> | |
+#include <linux/kernel.h> | |
+#include <linux/math.h> | |
+#include <linux/mm.h> | |
+#include <linux/scatterlist.h> | |
+#include <linux/string.h> | |
+#include <linux/types.h> | |
+ | |
+#include <uapi/drm/gna_drm.h> | |
+ | |
+#include "../drm_internal.h" | |
+ | |
+#include "gna_device.h" | |
+#include "gna_gem.h" | |
+#include "gna_hw.h" | |
+#include "gna_mem.h" | |
+#include "gna_request.h" | |
+#include "gna_score.h" | |
+ | |
+static int gna_do_patch_memory(struct gna_device *gna_priv, | |
+ struct gna_memory_patch *patch, void *vaddr) | |
+{ | |
+ size_t size; | |
+ void *dest; | |
+ u64 value; | |
+ | |
+ value = patch->value; | |
+ size = patch->size; | |
+ dest = (u8 *)vaddr + patch->offset; | |
+ | |
+ switch (size) { | |
+ case 0: | |
+ return -EFAULT; | |
+ case sizeof(u8): | |
+ *((u8 *)dest) = (u8)value; | |
+ break; | |
+ case sizeof(u16): | |
+ *((u16 *)dest) = (u16)value; | |
+ break; | |
+ case sizeof(u32): | |
+ *((u32 *)dest) = (u32)value; | |
+ break; | |
+ case sizeof(u64): | |
+ *((u64 *)dest) = (u64)value; | |
+ break; | |
+ default: | |
+ return -EINVAL; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int gna_patch_memory(struct gna_device *gna_priv, struct gna_buffer_with_object *buffer) | |
+{ | |
+ struct drm_gem_shmem_object *drmshmemo = &buffer->gem->base; | |
+ struct gna_gem_object *gnagemo = buffer->gem; | |
+ struct gna_buffer *gnab = &buffer->gna; | |
+ struct gna_memory_patch *patch; | |
+ struct iosys_map vmap; | |
+ struct sg_table *sgt; | |
+ int ret = 0; | |
+ u32 i; | |
+ | |
+ dev_dbg(gna_dev(gna_priv), "handle: %u, patch_count, %llu\n", | |
+ gnab->handle, gnab->patch_count); | |
+ | |
+ sgt = drm_gem_shmem_get_pages_sgt(drmshmemo); | |
+ | |
+ if (IS_ERR(sgt)) { | |
+ ret = PTR_ERR(sgt); | |
+ goto err; | |
+ } | |
+ | |
+ if (gnab->patch_count) { | |
+ ret = drm_gem_vmap(&drmshmemo->base, &vmap); | |
+ | |
+ if (ret) | |
+ goto err_pages_sgt; | |
+ | |
+ patch = (struct gna_memory_patch *)(uintptr_t)gnab->patches_ptr; | |
+ for (i = 0; i < gnab->patch_count; i++, patch++) { | |
+ ret = gna_do_patch_memory(gna_priv, patch, vmap.vaddr); | |
+ if (ret) | |
+ break; | |
+ } | |
+ | |
+ kvfree((void *)(uintptr_t)gnab->patches_ptr); | |
+ gnab->patches_ptr = 0; | |
+ drm_gem_vunmap(&drmshmemo->base, &vmap); | |
+ if (ret) // ret from gna_do_patch_memory | |
+ goto err_pages_sgt; | |
+ } | |
+ | |
+ gna_mmu_add(gna_priv, drmshmemo); | |
+ | |
+ return 0; | |
+ | |
+err_pages_sgt: | |
+ gna_gem_object_put_pages_sgt(gnagemo); | |
+err: | |
+ return ret; | |
+} | |
+ | |
+static struct gna_buffer_with_object *gna_find_buffer(struct gna_buffer_with_object *buffer_list, | |
+ u32 buffer_count, u32 mmu_offset, u32 *memory_offset) | |
+{ | |
+ struct gna_buffer_with_object *buffer; | |
+ u32 memory_size; | |
+ u32 offset; | |
+ u32 i; | |
+ | |
+ offset = 0; | |
+ for (i = 0; i < buffer_count; i++) { | |
+ buffer = buffer_list + i; | |
+ memory_size = round_up(buffer->gna.size, PAGE_SIZE); | |
+ if (mmu_offset < offset + memory_size) { | |
+ *memory_offset = offset; | |
+ return buffer; | |
+ } | |
+ offset += memory_size; | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static int gna_copy_gmm_config(struct gna_device *gna_priv, | |
+ struct gna_buffer_with_object *buffer_list, | |
+ u32 buffer_count, u32 mmu_offset) | |
+{ | |
+ struct gna_buffer_with_object *buffer; | |
+ struct gna_hw_descriptor *hwdesc; | |
+ struct drm_gem_object *drmgemo; | |
+ struct gna_mmu_object *mmu; | |
+ struct iosys_map vmap; | |
+ u32 memory_offset; | |
+ u8 *gmm_desc; | |
+ int ret = 0; | |
+ | |
+ mmu = &gna_priv->mmu; | |
+ hwdesc = mmu->hwdesc; | |
+ | |
+ buffer = gna_find_buffer(buffer_list, buffer_count, mmu_offset, &memory_offset); | |
+ if (!buffer) | |
+ return -EINVAL; | |
+ | |
+ drmgemo = &buffer->gem->base.base; | |
+ | |
+ ret = drm_gem_vmap(drmgemo, &vmap); | |
+ | |
+ if (!ret) { | |
+ ret = -ENOMEM; | |
+ return ret; | |
+ } | |
+ | |
+ gmm_desc = (u8 *)vmap.vaddr + (mmu_offset - memory_offset); | |
+ memcpy(&hwdesc->xnn_config, gmm_desc, sizeof(struct gna_xnn_descriptor)); | |
+ drm_gem_vunmap(drmgemo, &vmap); | |
+ | |
+ return 0; | |
+} | |
+ | |
+int gna_score(struct gna_request *score_request) | |
+{ | |
+ struct gna_buffer_with_object *buffer; | |
+ struct gna_xnn_descriptor *xnn_config; | |
+ struct gna_compute_cfg *compute_cfg; | |
+ struct gna_device *gna_priv; | |
+ struct gna_mmu_object *mmu; | |
+ u64 buffer_count; | |
+ u32 desc_base; | |
+ int ret; | |
+ u64 i; | |
+ | |
+ ret = 0; | |
+ | |
+ gna_priv = to_gna_device(score_request->drm_f->minor->dev); | |
+ | |
+ mmu = &gna_priv->mmu; | |
+ xnn_config = &mmu->hwdesc->xnn_config; | |
+ compute_cfg = &score_request->compute_cfg; | |
+ | |
+ buffer_count = score_request->buffer_count; | |
+ | |
+ for (i = 0, buffer = score_request->buffer_list; i < buffer_count; i++, buffer++) { | |
+ ret = gna_patch_memory(gna_priv, buffer); | |
+ if (ret) | |
+ goto err; | |
+ } | |
+ | |
+ switch (compute_cfg->gna_mode) { | |
+ case GNA_MODE_XNN: | |
+ dev_dbg(gna_dev(gna_priv), "xNN mode; labase: %d, lacount: %d\n", | |
+ compute_cfg->layer_base, compute_cfg->layer_count); | |
+ xnn_config->labase = compute_cfg->layer_base; | |
+ xnn_config->lacount = compute_cfg->layer_count; | |
+ break; | |
+ case GNA_MODE_GMM: | |
+ dev_dbg(gna_dev(gna_priv), "GMM mode; offset: %d\n", compute_cfg->layer_base); | |
+ ret = gna_copy_gmm_config(gna_priv, score_request->buffer_list, | |
+ buffer_count, compute_cfg->layer_base); | |
+ if (ret) | |
+ goto err; | |
+ break; | |
+ default: | |
+ ret = -EINVAL; | |
+ goto err; | |
+ } | |
+ | |
+ desc_base = (u32)(mmu->hwdesc_dma >> PAGE_SHIFT); | |
+ gna_reg_write(gna_priv, GNA_MMIO_DESBASE, desc_base); | |
+ | |
+ gna_start_scoring(gna_priv, compute_cfg); | |
+ | |
+err: | |
+ return ret; | |
+} | |
diff --git a/drivers/gpu/drm/gna/gna_score.h b/drivers/gpu/drm/gna/gna_score.h | |
new file mode 100644 | |
index 000000000000..5b154d3623e0 | |
--- /dev/null | |
+++ b/drivers/gpu/drm/gna/gna_score.h | |
@@ -0,0 +1,11 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef __GNA_SCORE_H__ | |
+#define __GNA_SCORE_H__ | |
+ | |
+struct gna_request; | |
+ | |
+int gna_score(struct gna_request *score_request); | |
+ | |
+#endif // __GNA_SCORE_H__ | |
diff --git a/include/uapi/drm/gna_drm.h b/include/uapi/drm/gna_drm.h | |
new file mode 100644 | |
index 000000000000..677343d88987 | |
--- /dev/null | |
+++ b/include/uapi/drm/gna_drm.h | |
@@ -0,0 +1,169 @@ | |
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ | |
+/* Copyright(c) 2017-2022 Intel Corporation */ | |
+ | |
+#ifndef _GNA_DRM_H_ | |
+#define _GNA_DRM_H_ | |
+ | |
+#include <linux/const.h> | |
+#include <linux/types.h> | |
+ | |
+#include "drm.h" | |
+ | |
+#define GNA_DDI_VERSION_3 3 | |
+ | |
+/* Operation modes */ | |
+#define GNA_MODE_GMM 0 | |
+#define GNA_MODE_XNN 1 | |
+ | |
+#define GNA_PARAM_RECOVERY_TIMEOUT 1 | |
+#define GNA_PARAM_DEVICE_TYPE 2 | |
+#define GNA_PARAM_INPUT_BUFFER_S 3 | |
+#define GNA_PARAM_DDI_VERSION 4 | |
+ | |
+#define GNA_STS_SCORE_COMPLETED _BITUL(0) | |
+#define GNA_STS_STATISTICS_VALID _BITUL(3) | |
+#define GNA_STS_PCI_MMU_ERR _BITUL(4) | |
+#define GNA_STS_PCI_DMA_ERR _BITUL(5) | |
+#define GNA_STS_PCI_UNEXCOMPL_ERR _BITUL(6) | |
+#define GNA_STS_VA_OOR _BITUL(7) | |
+#define GNA_STS_PARAM_OOR _BITUL(8) | |
+#define GNA_STS_SATURATE _BITUL(17) | |
+ | |
+#define GNA_ERROR \ | |
+ (GNA_STS_PCI_DMA_ERR |\ | |
+ GNA_STS_PCI_MMU_ERR |\ | |
+ GNA_STS_PCI_UNEXCOMPL_ERR |\ | |
+ GNA_STS_PARAM_OOR |\ | |
+ GNA_STS_VA_OOR) | |
+ | |
+#define GNA_DEV_TYPE_0_9 0x09 | |
+#define GNA_DEV_TYPE_1_0 0x10 | |
+#define GNA_DEV_TYPE_2_0 0x20 | |
+#define GNA_DEV_TYPE_3_0 0x30 | |
+#define GNA_DEV_TYPE_3_5 0x35 | |
+ | |
+#define GNA_FLAG_SCORE_QOS _BITUL(0) | |
+ | |
+/* | |
+ * Structure describes part of memory to be overwritten before starting GNA | |
+ */ | |
+struct gna_memory_patch { | |
+ /* offset from targeted memory */ | |
+ __u64 offset; | |
+ | |
+ __u64 size; | |
+ __u64 value; | |
+}; | |
+ | |
+struct gna_buffer { | |
+ __u32 handle; | |
+ __u32 pad; | |
+ | |
+ __u64 offset; | |
+ __u64 size; | |
+ | |
+ __u64 patch_count; | |
+ __u64 patches_ptr; | |
+}; | |
+ | |
+/* | |
+ * Driver performance timestamps in nanoseconds. | |
+ * Values regard system boot time, but do not count during suspend. | |
+ */ | |
+struct gna_drv_perf { | |
+ __u64 pre_processing; /* driver starts pre-processing */ | |
+ __u64 processing; /* hw starts processing */ | |
+ __u64 hw_completed; /* hw finishes processing */ | |
+ __u64 completion; /* driver finishes post-processing */ | |
+}; | |
+ | |
+struct gna_hw_perf { | |
+ __u64 total; | |
+ __u64 stall; | |
+}; | |
+ | |
+struct gna_compute_cfg { | |
+ __u32 layer_base; | |
+ __u32 layer_count; | |
+ | |
+ /* List of GNA memory buffers */ | |
+ __u64 buffers_ptr; | |
+ __u64 buffer_count; | |
+ | |
+ __u8 active_list_on; | |
+ __u8 gna_mode; | |
+ __u8 hw_perf_encoding; | |
+ __u8 flags; | |
+ | |
+ __u8 pad[4]; | |
+}; | |
+ | |
+typedef __u64 gna_param_id; | |
+ | |
+union gna_parameter { | |
+ struct { | |
+ gna_param_id id; | |
+ } in; | |
+ | |
+ struct { | |
+ __u64 value; | |
+ } out; | |
+}; | |
+ | |
+union gna_compute { | |
+ struct { | |
+ struct gna_compute_cfg config; | |
+ } in; | |
+ | |
+ struct { | |
+ __u64 request_id; | |
+ } out; | |
+}; | |
+ | |
+union gna_wait { | |
+ struct { | |
+ __u64 request_id; | |
+ __u32 timeout; | |
+ __u32 pad; | |
+ } in; | |
+ | |
+ struct { | |
+ __u32 hw_status; | |
+ __u32 pad; | |
+ struct gna_drv_perf drv_perf; | |
+ struct gna_hw_perf hw_perf; | |
+ } out; | |
+}; | |
+ | |
+struct gna_mem_id { | |
+ __u32 handle; | |
+ __u32 pad; | |
+ __u64 vma_fake_offset; | |
+ __u64 size_granted; | |
+}; | |
+ | |
+union gna_gem_new { | |
+ struct { | |
+ __u64 size; | |
+ } in; | |
+ | |
+ struct gna_mem_id out; | |
+}; | |
+ | |
+struct gna_gem_free { | |
+ __u32 handle; | |
+}; | |
+ | |
+#define DRM_GNA_GET_PARAMETER 0x00 | |
+#define DRM_GNA_GEM_NEW 0x01 | |
+#define DRM_GNA_GEM_FREE 0x02 | |
+#define DRM_GNA_COMPUTE 0x03 | |
+#define DRM_GNA_WAIT 0x04 | |
+ | |
+#define DRM_IOCTL_GNA_GET_PARAMETER DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GET_PARAMETER, union gna_parameter) | |
+#define DRM_IOCTL_GNA_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GEM_NEW, union gna_gem_new) | |
+#define DRM_IOCTL_GNA_GEM_FREE DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GEM_FREE, struct gna_gem_free) | |
+#define DRM_IOCTL_GNA_COMPUTE DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_COMPUTE, union gna_compute) | |
+#define DRM_IOCTL_GNA_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_WAIT, union gna_wait) | |
+ | |
+#endif /* _GNA_DRM_H_ */ | |
-- | |
2.44.0 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment