[PATCH 1/8] dt-bindings: tegra186-hsp: Add shared interrupts

2018-05-08 Thread Mikko Perttunen
Non-doorbell interrupts are routed through "shared interrupts". These
interrupts can be mapped to various internal interrupt lines. Add
interrupt properties for shared interrupts to the tegra186-hsp device
tree bindings.

Signed-off-by: Mikko Perttunen 
---
 Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.txt 
b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.txt
index b99d25fc2f26..9edcdf82d719 100644
--- a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.txt
+++ b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.txt
@@ -21,6 +21,8 @@ Required properties:
 Contains a list of names for the interrupts described by the interrupt
 property. May contain the following entries, in any order:
 - "doorbell"
+- "sharedN", where 'N' is a number from zero up to the number of
+  external interrupts supported by the HSP instance minus one.
 Users of this binding MUST look up entries in the interrupt property
 by name, using this interrupt-names property to do so.
 - interrupts
-- 
2.16.1



[PATCH 7/8] arm64: tegra: Add nodes for tcu on Tegra194

2018-05-08 Thread Mikko Perttunen
Add nodes required for communication through the Tegra Combined UART.
This includes the AON HSP instance, addition of shared interrupts
for the TOP0 HSP instance, and finally the TCU node itself. Also
mark the HSP instances as compatible to tegra194-hsp, as the hardware
is not identical but is compatible to tegra186-hsp.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 34 +---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 6d699815a84f..d7f780b06fe2 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -217,10 +217,31 @@
};
 
hsp_top0: hsp@3c0 {
-   compatible = "nvidia,tegra186-hsp";
+   compatible = "nvidia,tegra194-hsp", 
"nvidia,tegra186-hsp";
reg = <0x03c0 0xa>;
-   interrupts = ;
-   interrupt-names = "doorbell";
+   interrupts = ,
+,
+,
+,
+,
+,
+,
+,
+;
+   interrupt-names = "doorbell", "shared0", "shared1", 
"shared2",
+ "shared3", "shared4", "shared5", 
"shared6",
+ "shared7";
+   #mbox-cells = <2>;
+   };
+
+   hsp_aon: hsp@c15 {
+   compatible = "nvidia,tegra194-hsp", 
"nvidia,tegra186-hsp";
+   reg = <0x0c15 0xa>;
+   interrupts = ,
+,
+,
+;
+   interrupt-names = "shared0", "shared1", "shared2", 
"shared3";
#mbox-cells = <2>;
};
 
@@ -382,6 +403,13 @@
};
};
 
+   tcu: tcu {
+   compatible = "nvidia,tegra194-tcu";
+   mboxes = <_top0 TEGRA_HSP_MBOX_TYPE_SM 0>,
+<_aon TEGRA_HSP_MBOX_TYPE_SM 1>;
+   mbox-names = "rx", "tx";
+   };
+
timer {
compatible = "arm,armv8-timer";
interrupts = 

[PATCH 5/8] mailbox: tegra-hsp: Add support for shared mailboxes

2018-05-08 Thread Mikko Perttunen
The Tegra HSP block supports 'shared mailboxes' that are simple 32-bit
registers consisting of a FULL bit in MSB position and 31 bits of data.
The hardware can be configured to trigger interrupts when a mailbox
is empty or full. Add support for these shared mailboxes to the HSP
driver.

The initial use for the mailboxes is the Tegra Combined UART. For this
purpose, we use interrupts to receive data, and spinning to wait for
the transmit mailbox to be emptied to minimize unnecessary overhead.

Signed-off-by: Mikko Perttunen 
---
 drivers/mailbox/tegra-hsp.c | 216 +++-
 1 file changed, 193 insertions(+), 23 deletions(-)

diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c
index 16eb970f2c9f..77bc8ed7ef15 100644
--- a/drivers/mailbox/tegra-hsp.c
+++ b/drivers/mailbox/tegra-hsp.c
@@ -21,6 +21,11 @@
 
 #include 
 
+#include "mailbox.h"
+
+#define HSP_INT0_IE0x100
+#define HSP_INT_IR 0x304
+
 #define HSP_INT_DIMENSIONING   0x380
 #define HSP_nSM_SHIFT  0
 #define HSP_nSS_SHIFT  4
@@ -34,6 +39,8 @@
 #define HSP_DB_RAW 0x8
 #define HSP_DB_PENDING 0xc
 
+#define HSP_SM_SHRD_MBOX   0x0
+
 #define HSP_DB_CCPLEX  1
 #define HSP_DB_BPMP3
 #define HSP_DB_MAX 7
@@ -68,6 +75,18 @@ struct tegra_hsp_db_map {
unsigned int index;
 };
 
+struct tegra_hsp_mailbox {
+   struct tegra_hsp_channel channel;
+   unsigned int index;
+   bool sending;
+};
+
+static inline struct tegra_hsp_mailbox *
+channel_to_mailbox(struct tegra_hsp_channel *channel)
+{
+   return container_of(channel, struct tegra_hsp_mailbox, channel);
+}
+
 struct tegra_hsp_soc {
const struct tegra_hsp_db_map *map;
 };
@@ -77,6 +96,7 @@ struct tegra_hsp {
struct mbox_controller mbox;
void __iomem *regs;
unsigned int doorbell_irq;
+   unsigned int shared_irq;
unsigned int num_sm;
unsigned int num_as;
unsigned int num_ss;
@@ -85,6 +105,7 @@ struct tegra_hsp {
spinlock_t lock;
 
struct list_head doorbells;
+   struct tegra_hsp_mailbox *mailboxes;
 };
 
 static inline struct tegra_hsp *
@@ -189,6 +210,35 @@ static irqreturn_t tegra_hsp_doorbell_irq(int irq, void 
*data)
return IRQ_HANDLED;
 }
 
+static irqreturn_t tegra_hsp_shared_irq(int irq, void *data)
+{
+   struct tegra_hsp_mailbox *mb;
+   struct tegra_hsp *hsp = data;
+   unsigned long bit, mask;
+   u32 value;
+
+   mask = tegra_hsp_readl(hsp, HSP_INT_IR);
+   /* Only interested in FULL interrupts */
+   mask &= 0xff << 8;
+
+   for_each_set_bit(bit, , 16) {
+   unsigned int mb_i = bit % 8;
+
+   mb = >mailboxes[mb_i];
+
+   if (!mb->sending) {
+   value = tegra_hsp_channel_readl(>channel,
+   HSP_SM_SHRD_MBOX);
+   value &= ~BIT(31);
+   mbox_chan_received_data(mb->channel.chan, );
+   tegra_hsp_channel_writel(>channel, value,
+HSP_SM_SHRD_MBOX);
+   }
+   }
+
+   return IRQ_HANDLED;
+}
+
 static struct tegra_hsp_channel *
 tegra_hsp_doorbell_create(struct tegra_hsp *hsp, const char *name,
  unsigned int master, unsigned int index)
@@ -277,15 +327,58 @@ static void tegra_hsp_doorbell_shutdown(struct 
tegra_hsp_doorbell *db)
spin_unlock_irqrestore(>lock, flags);
 }
 
+static int tegra_hsp_mailbox_startup(struct tegra_hsp_mailbox *mb)
+{
+   struct tegra_hsp *hsp = mb->channel.hsp;
+   u32 value;
+
+   mb->channel.chan->txdone_method = TXDONE_BY_BLOCK;
+
+   /* Route FULL interrupt to external IRQ 0 */
+   value = tegra_hsp_readl(hsp, HSP_INT0_IE);
+   value |= BIT(mb->index + 8);
+   tegra_hsp_writel(hsp, value, HSP_INT0_IE);
+
+   return 0;
+}
+
+static int tegra_hsp_mailbox_shutdown(struct tegra_hsp_mailbox *mb)
+{
+   struct tegra_hsp *hsp = mb->channel.hsp;
+   u32 value;
+
+   value = tegra_hsp_readl(hsp, HSP_INT0_IE);
+   value &= ~BIT(mb->index + 8);
+   tegra_hsp_writel(hsp, value, HSP_INT0_IE);
+
+   return 0;
+}
+
 static int tegra_hsp_send_data(struct mbox_chan *chan, void *data)
 {
struct tegra_hsp_channel *channel = chan->con_priv;
-   struct tegra_hsp_doorbell *db;
+   struct tegra_hsp_mailbox *mailbox;
+   uint32_t value;
 
switch (channel->type) {
case TEGRA_HSP_MBOX_TYPE_DB:
-   db = channel_to_doorbell(channel);
-   tegra_hsp_channel_writel(>channel, 1, HSP_DB_TRIGGER);
+   tegra_hsp_channel_writel(channel, 1, HSP_DB_TRIGGER);
+   return 0;
+   case TEGRA_HSP_MBOX_TYPE_SM:
+   mailbox = channel_to_mailbox(channel);
+  

[PATCH 3/8] mailbox: Add transmit done by blocking option

2018-05-08 Thread Mikko Perttunen
Add a new TXDONE option, TXDONE_BY_BLOCK. With this option, the
send_data function of the mailbox driver is expected to block until
the message has been sent. The new option is used with the Tegra
Combined UART driver to minimize unnecessary overhead when transmitting
data.

Signed-off-by: Mikko Perttunen 
---
 drivers/mailbox/mailbox.c | 30 +-
 drivers/mailbox/mailbox.h |  1 +
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 674b35f402f5..5c76b70e673c 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -53,6 +53,8 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
return idx;
 }
 
+static void tx_tick(struct mbox_chan *chan, int r, bool submit_next);
+
 static void msg_submit(struct mbox_chan *chan)
 {
unsigned count, idx;
@@ -60,10 +62,13 @@ static void msg_submit(struct mbox_chan *chan)
void *data;
int err = -EBUSY;
 
+next:
spin_lock_irqsave(>lock, flags);
 
-   if (!chan->msg_count || chan->active_req)
-   goto exit;
+   if (!chan->msg_count || chan->active_req) {
+   spin_unlock_irqrestore(>lock, flags);
+   return;
+   }
 
count = chan->msg_count;
idx = chan->msg_free;
@@ -82,15 +87,21 @@ static void msg_submit(struct mbox_chan *chan)
chan->active_req = data;
chan->msg_count--;
}
-exit:
+
spin_unlock_irqrestore(>lock, flags);
 
if (!err && (chan->txdone_method & TXDONE_BY_POLL))
/* kick start the timer immediately to avoid delays */
hrtimer_start(>mbox->poll_hrt, 0, HRTIMER_MODE_REL);
+
+   if (chan->txdone_method & TXDONE_BY_BLOCK) {
+   tx_tick(chan, err, false);
+   if (!err)
+   goto next;
+   }
 }
 
-static void tx_tick(struct mbox_chan *chan, int r)
+static void tx_tick(struct mbox_chan *chan, int r, bool submit_next)
 {
unsigned long flags;
void *mssg;
@@ -101,7 +112,8 @@ static void tx_tick(struct mbox_chan *chan, int r)
spin_unlock_irqrestore(>lock, flags);
 
/* Submit next message */
-   msg_submit(chan);
+   if (submit_next)
+   msg_submit(chan);
 
if (!mssg)
return;
@@ -127,7 +139,7 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer 
*hrtimer)
if (chan->active_req && chan->cl) {
txdone = chan->mbox->ops->last_tx_done(chan);
if (txdone)
-   tx_tick(chan, 0);
+   tx_tick(chan, 0, true);
else
resched = true;
}
@@ -176,7 +188,7 @@ void mbox_chan_txdone(struct mbox_chan *chan, int r)
return;
}
 
-   tx_tick(chan, r);
+   tx_tick(chan, r, true);
 }
 EXPORT_SYMBOL_GPL(mbox_chan_txdone);
 
@@ -196,7 +208,7 @@ void mbox_client_txdone(struct mbox_chan *chan, int r)
return;
}
 
-   tx_tick(chan, r);
+   tx_tick(chan, r, true);
 }
 EXPORT_SYMBOL_GPL(mbox_client_txdone);
 
@@ -275,7 +287,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
ret = wait_for_completion_timeout(>tx_complete, wait);
if (ret == 0) {
t = -ETIME;
-   tx_tick(chan, t);
+   tx_tick(chan, t, true);
}
}
 
diff --git a/drivers/mailbox/mailbox.h b/drivers/mailbox/mailbox.h
index 456ba68513bb..ec68e2e28cd6 100644
--- a/drivers/mailbox/mailbox.h
+++ b/drivers/mailbox/mailbox.h
@@ -10,5 +10,6 @@
 #define TXDONE_BY_IRQ  BIT(0) /* controller has remote RTR irq */
 #define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */
 #define TXDONE_BY_ACK  BIT(2) /* S/W ACK recevied by Client ticks the TX */
+#define TXDONE_BY_BLOCKBIT(3) /* mailbox driver send_data blocks until 
done */
 
 #endif /* __MAILBOX_H */
-- 
2.16.1



[PATCH 2/8] dt-bindings: serial: Add bindings for nvidia,tegra194-tcu

2018-05-08 Thread Mikko Perttunen
Add bindings for the Tegra Combined UART device used to talk to the
UART console on Tegra194 systems.

Signed-off-by: Mikko Perttunen 
---
 .../bindings/serial/nvidia,tegra194-tcu.txt| 35 ++
 1 file changed, 35 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/serial/nvidia,tegra194-tcu.txt

diff --git a/Documentation/devicetree/bindings/serial/nvidia,tegra194-tcu.txt 
b/Documentation/devicetree/bindings/serial/nvidia,tegra194-tcu.txt
new file mode 100644
index ..86763bc5d74f
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/nvidia,tegra194-tcu.txt
@@ -0,0 +1,35 @@
+NVIDIA Tegra Combined UART (TCU)
+
+The TCU is a system for sharing a hardware UART instance among multiple
+systems withing the Tegra SoC. It is implemented through a mailbox-
+based protocol where each "virtual UART" has a pair of mailboxes, one
+for transmitting and one for receiving, that is used to communicate
+with the hardware implementing the TCU.
+
+Required properties:
+- name : Should be tcu
+- compatible
+Array of strings
+One of:
+- "nvidia,tegra194-tcu"
+- mbox-names:
+"rx" - Mailbox for receiving data from hardware UART
+"tx" - Mailbox for transmitting data to hardware UART
+- mboxes: Mailboxes corresponding to the mbox-names. 
+
+This node is a mailbox consumer. See the following files for details of
+the mailbox subsystem, and the specifiers implemented by the relevant
+provider(s):
+
+- .../mailbox/mailbox.txt
+- .../mailbox/nvidia,tegra186-hsp.txt
+
+Example bindings:
+-
+
+tcu: tcu {
+   compatible = "nvidia,tegra194-tcu";
+   mboxes = <_top0 TEGRA_HSP_MBOX_TYPE_SM 0>,
+<_aon TEGRA_HSP_MBOX_TYPE_SM 1>;
+   mbox-names = "rx", "tx";
+};
-- 
2.16.1



[PATCH 0/8] Tegra Combined UART driver

2018-05-08 Thread Mikko Perttunen
Hi all,

on Tegra194, the primary console UART is the "Tegra Combined UART",
or TCU. This is a "virtual UART", where each consumer communicates
with a central implementation over mailboxes. The central
implementation then multiplexes the streams and arbitrates use of
a hardware serial port. This driver implements the consumer portion
to allow using the primary console.

The series is split into the following parts:
* patches 1 and 2 add the device tree bindings for mailbox and tcu
  itself.
* patch 3 adds a blocking transmission option to the mailbox
  framework.
* patches 4 and 5 add support for the "shared mailbox" primitive
  to the Tegra HSP driver.
* patch 6 adds the TCU driver itself
* patches 7 and 8 do the necessary device tree changes.

The series has been tested on the Tegra194 P2972 board.

Thanks,
Mikko

Mikko Perttunen (8):
  dt-bindings: tegra186-hsp: Add shared interrupts
  dt-bindings: serial: Add bindings for nvidia,tegra194-tcu
  mailbox: Add transmit done by blocking option
  mailbox: tegra-hsp: Refactor in preparation of mailboxes
  mailbox: tegra-hsp: Add support for shared mailboxes
  serial: Add Tegra Combined UART driver
  arm64: tegra: Add nodes for tcu on Tegra194
  arm64: tegra: Mark tcu as primary serial port on Tegra194 P2888

 .../bindings/mailbox/nvidia,tegra186-hsp.txt   |   2 +
 .../bindings/serial/nvidia,tegra194-tcu.txt|  35 +++
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi |   2 +-
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   |  34 ++-
 drivers/mailbox/mailbox.c  |  30 +-
 drivers/mailbox/mailbox.h  |   1 +
 drivers/mailbox/tegra-hsp.c| 320 +
 drivers/tty/serial/Kconfig |   9 +
 drivers/tty/serial/Makefile|   1 +
 drivers/tty/serial/tegra-tcu.c | 302 +++
 include/uapi/linux/serial_core.h   |   3 +
 11 files changed, 671 insertions(+), 68 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/serial/nvidia,tegra194-tcu.txt
 create mode 100644 drivers/tty/serial/tegra-tcu.c

-- 
2.16.1



[PATCH 0/6] Host1x and VIC support for Tegra186

2017-08-17 Thread Mikko Perttunen
Hi everyone,

this series adds basic support for the Host1x channel engine and the
VIC 2d compositor unit on Tegra186. The first three patches do the
required device tree changes, the fourth patch updates the device tree
binding documentation, and the two remaining patches add the actual
implementation, almost all of which is in Host1x itself.

The Tegra186 Host1x is a relatively large update over previous
generations, which can be seen in the diffstat. The biggest change is
that Host1x is now contains separate hypervisor and vm register
apertures to support virtualization at the hardware level. This driver,
however, currently assumes that this instance of Linux is the sole
operating system having access to the hardware.

This combined with increased numbers of supported channels and
syncpoints have caused a number of register space changes that are
responsible for most of the updated code.

The series has been tested on the Jetson TX1 (T210) and TX2 (T186)
using the host1x_test test suite available at

http://github.com/cyndis/host1x_test

The series itself is available at

http://github.com/cyndis/linux, branch host1x-t186-1

Cheers,
Mikko

Mikko Perttunen (6):
  arm64: tegra: Add #power-domain-cells for BPMP
  arm64: tegra: Add host1x on Tegra186
  arm64: tegra: Add VIC on Tegra186
  dt-bindings: host1x: Fix and add Tegra186 information
  gpu: host1x: Add Tegra186 support
  drm/tegra: Add Tegra186 support for VIC

 .../display/tegra/nvidia,tegra20-host1x.txt|   9 +-
 arch/arm64/boot/dts/nvidia/tegra186.dtsi   |  31 
 drivers/gpu/drm/tegra/drm.c|   1 +
 drivers/gpu/drm/tegra/vic.c|  10 ++
 drivers/gpu/host1x/Makefile|   3 +-
 drivers/gpu/host1x/dev.c   |  60 ++-
 drivers/gpu/host1x/dev.h   |   4 +
 drivers/gpu/host1x/hw/cdma_hw.c|  49 +++---
 drivers/gpu/host1x/hw/debug_hw.c   | 137 +---
 .../gpu/host1x/hw/{debug_hw.c => debug_hw_1x01.c}  | 160 --
 drivers/gpu/host1x/hw/debug_hw_1x06.c  | 133 +++
 drivers/gpu/host1x/hw/host1x01.c   |   2 +
 drivers/gpu/host1x/hw/host1x02.c   |   2 +
 drivers/gpu/host1x/hw/host1x04.c   |   2 +
 drivers/gpu/host1x/hw/host1x05.c   |   2 +
 drivers/gpu/host1x/hw/{host1x02.c => host1x06.c}   |  12 +-
 drivers/gpu/host1x/hw/{host1x02.c => host1x06.h}   |  30 +---
 drivers/gpu/host1x/hw/host1x06_hardware.h  | 142 
 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |  32 
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 181 +
 drivers/gpu/host1x/hw/hw_host1x06_vm.h |  47 ++
 drivers/gpu/host1x/hw/intr_hw.c|  29 ++--
 22 files changed, 719 insertions(+), 359 deletions(-)
 copy drivers/gpu/host1x/hw/{debug_hw.c => debug_hw_1x01.c} (53%)
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x06.c
 copy drivers/gpu/host1x/hw/{host1x02.c => host1x06.c} (84%)
 copy drivers/gpu/host1x/hw/{host1x02.c => host1x06.h} (50%)
 create mode 100644 drivers/gpu/host1x/hw/host1x06_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_vm.h

-- 
2.14.1



[PATCH 6/6] drm/tegra: Add Tegra186 support for VIC

2017-08-17 Thread Mikko Perttunen
Add Tegra186 support for VIC - no changes are required except for new
firmware and compatibility string.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c |  1 +
 drivers/gpu/drm/tegra/vic.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 3ba659a5940d..e3331a2bc082 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1281,6 +1281,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra210-sor", },
{ .compatible = "nvidia,tegra210-sor1", },
{ .compatible = "nvidia,tegra210-vic", },
+   { .compatible = "nvidia,tegra186-vic", },
{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 2448229fa653..6697a21a250d 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -270,9 +270,16 @@ static const struct vic_config vic_t210_config = {
.firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE,
 };
 
+#define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin"
+
+static const struct vic_config vic_t186_config = {
+   .firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE,
+};
+
 static const struct of_device_id vic_match[] = {
{ .compatible = "nvidia,tegra124-vic", .data = _t124_config },
{ .compatible = "nvidia,tegra210-vic", .data = _t210_config },
+   { .compatible = "nvidia,tegra186-vic", .data = _t186_config },
{ },
 };
 
@@ -405,3 +412,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_124_VIC_FIRMWARE);
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
 MODULE_FIRMWARE(NVIDIA_TEGRA_210_VIC_FIRMWARE);
 #endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE);
+#endif
-- 
2.14.1



[PATCH 1/6] arm64: tegra: Add #power-domain-cells for BPMP

2017-08-17 Thread Mikko Perttunen
Add #power-domain-cells for the BPMP node on Tegra186 so that the power
domain provider may be used.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 0b0552c9f7dd..a964d246c0e9 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -443,6 +443,7 @@
shmem = <_bpmp_tx _bpmp_rx>;
#clock-cells = <1>;
#reset-cells = <1>;
+   #power-domain-cells = <1>;
 
bpmp_i2c: i2c {
compatible = "nvidia,tegra186-bpmp-i2c";
-- 
2.14.1



[PATCH 2/6] arm64: tegra: Add host1x on Tegra186

2017-08-17 Thread Mikko Perttunen
Add the node for Host1x on the Tegra186, without any subdevices
for now.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index a964d246c0e9..3556f89ddf1d 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -355,6 +355,24 @@
nvidia,bpmp = <>;
};
 
+   host1x@13e0 {
+   compatible = "nvidia,tegra186-host1x", "simple-bus";
+   reg = <0x0 0x13e0 0x0 0x1>,
+ <0x0 0x13e1 0x0 0x1>;
+   reg-names = "hypervisor", "vm";
+   interrupts = ,
+;
+   clocks = < TEGRA186_CLK_HOST1X>;
+   clock-names = "host1x";
+   resets = < TEGRA186_RESET_HOST1X>;
+   reset-names = "host1x";
+
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   ranges = <0x0 0x1500 0x0 0x1500 0x0 0x0100>;
+   };
+
gpu@1700 {
compatible = "nvidia,gp10b";
reg = <0x0 0x1700 0x0 0x100>,
-- 
2.14.1



[PATCH 4/6] dt-bindings: host1x: Fix and add Tegra186 information

2017-08-17 Thread Mikko Perttunen
Add note that address/size-cells should be 2 on 64-bit systems,
and add Tegra186-specific register range properties.

Signed-off-by: Mikko Perttunen 
---
 .../devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt  | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt 
b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
index 74e1e8add5a1..b3e785b47100 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
@@ -3,11 +3,16 @@ NVIDIA Tegra host1x
 Required properties:
 - compatible: "nvidia,tegra-host1x"
 - reg: Physical base address and length of the controller's registers.
+  For pre-Tegra186, one entry describing the whole register area.
+  For Tegra186, one entry for each entry in reg-names:
+"vm" - VM region assigned to Linux
+"hypervisor" - Hypervisor region (only if Linux acts as hypervisor)
 - interrupts: The interrupt outputs from the controller.
 - #address-cells: The number of cells used to represent physical base addresses
-  in the host1x address space. Should be 1.
+  in the host1x address space. Should be 1 for 32-bit and 2 for 64-bit systems.
 - #size-cells: The number of cells used to represent the size of an address
-  range in the host1x address space. Should be 1.
+  range in the host1x address space. Should be 1 for 32-bit and 2 for 64-bit
+  systems.
 - ranges: The mapping of the host1x address space to the CPU address space.
 - clocks: Must contain one entry, for the module clock.
   See ../clocks/clock-bindings.txt for details.
-- 
2.14.1



[PATCH 5/6] gpu: host1x: Add Tegra186 support

2017-08-17 Thread Mikko Perttunen
Add support for the implementation of Host1x present on the Tegra186.
The register space has been shuffled around a little bit, requiring
addition of some chip-specific code sections. Tegra186 also adds
several new features, most importantly the hypervisor, but those are
not yet supported with this commit.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/Makefile|   3 +-
 drivers/gpu/host1x/dev.c   |  60 ++-
 drivers/gpu/host1x/dev.h   |   4 +
 drivers/gpu/host1x/hw/cdma_hw.c|  49 +++---
 drivers/gpu/host1x/hw/debug_hw.c   | 137 +---
 .../gpu/host1x/hw/{debug_hw.c => debug_hw_1x01.c}  | 160 --
 drivers/gpu/host1x/hw/debug_hw_1x06.c  | 133 +++
 drivers/gpu/host1x/hw/host1x01.c   |   2 +
 drivers/gpu/host1x/hw/host1x02.c   |   2 +
 drivers/gpu/host1x/hw/host1x04.c   |   2 +
 drivers/gpu/host1x/hw/host1x05.c   |   2 +
 drivers/gpu/host1x/hw/{host1x02.c => host1x06.c}   |  12 +-
 drivers/gpu/host1x/hw/{host1x02.c => host1x06.h}   |  30 +---
 drivers/gpu/host1x/hw/host1x06_hardware.h  | 142 
 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |  32 
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 181 +
 drivers/gpu/host1x/hw/hw_host1x06_vm.h |  47 ++
 drivers/gpu/host1x/hw/intr_hw.c|  29 ++--
 18 files changed, 670 insertions(+), 357 deletions(-)
 copy drivers/gpu/host1x/hw/{debug_hw.c => debug_hw_1x01.c} (53%)
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x06.c
 copy drivers/gpu/host1x/hw/{host1x02.c => host1x06.c} (84%)
 copy drivers/gpu/host1x/hw/{host1x02.c => host1x06.h} (50%)
 create mode 100644 drivers/gpu/host1x/hw/host1x06_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_vm.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index a1d9974cfcb5..4fb61bd57aee 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -11,6 +11,7 @@ host1x-y = \
hw/host1x01.o \
hw/host1x02.o \
hw/host1x04.o \
-   hw/host1x05.o
+   hw/host1x05.o \
+   hw/host1x06.o
 
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 2c58a390123a..6a4ff2d59496 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -39,6 +39,17 @@
 #include "hw/host1x02.h"
 #include "hw/host1x04.h"
 #include "hw/host1x05.h"
+#include "hw/host1x06.h"
+
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
+{
+   writel(v, host1x->hv_regs + r);
+}
+
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r)
+{
+   return readl(host1x->hv_regs + r);
+}
 
 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -104,7 +115,19 @@ static const struct host1x_info host1x05_info = {
.dma_mask = DMA_BIT_MASK(34),
 };
 
+static const struct host1x_info host1x06_info = {
+   .nb_channels = 63,
+   .nb_pts = 576,
+   .nb_mlocks = 24,
+   .nb_bases = 16,
+   .init = host1x06_init,
+   .sync_offset = 0x0,
+   .dma_mask = DMA_BIT_MASK(34),
+   .has_hypervisor = true,
+};
+
 static const struct of_device_id host1x_of_match[] = {
+   { .compatible = "nvidia,tegra186-host1x", .data = _info, },
{ .compatible = "nvidia,tegra210-host1x", .data = _info, },
{ .compatible = "nvidia,tegra124-host1x", .data = _info, },
{ .compatible = "nvidia,tegra114-host1x", .data = _info, },
@@ -117,8 +140,9 @@ MODULE_DEVICE_TABLE(of, host1x_of_match);
 static int host1x_probe(struct platform_device *pdev)
 {
const struct of_device_id *id;
+   const struct host1x_info *info;
struct host1x *host;
-   struct resource *regs;
+   struct resource *regs, *hv_regs = NULL;
int syncpt_irq;
int err;
 
@@ -126,10 +150,28 @@ static int host1x_probe(struct platform_device *pdev)
if (!id)
return -EINVAL;
 
-   regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   if (!regs) {
-   dev_err(>dev, "failed to get registers\n");
-   return -ENXIO;
+   info = id->data;
+
+   if (info->has_hypervisor) {
+   regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
+   if (!regs) {
+   dev_err(>dev, "failed to get vm registers\n");
+   return -ENXIO;
+   }
+
+   hv_regs = platform_get_resource_byname(pdev, IORESOUR

[PATCH 3/6] arm64: tegra: Add VIC on Tegra186

2017-08-17 Thread Mikko Perttunen
Add a node for the Video Image Compositor on the Tegra186.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 3556f89ddf1d..477d733f23eb 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -371,6 +371,18 @@
#size-cells = <2>;
 
ranges = <0x0 0x1500 0x0 0x1500 0x0 0x0100>;
+
+   vic@1534 {
+   compatible = "nvidia,tegra186-vic";
+   reg = <0x0 0x1534 0x0 0x4>;
+   interrupts = ;
+   clocks = < TEGRA186_CLK_VIC>;
+   clock-names = "vic";
+   resets = < TEGRA186_RESET_VIC>;
+   reset-names = "vic";
+
+   power-domains = < TEGRA186_POWER_DOMAIN_VIC>;
+   };
};
 
gpu@1700 {
-- 
2.14.1



[PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-18 Thread Mikko Perttunen
Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.h   | 16 
 drivers/gpu/host1x/hw/channel_hw.c |  3 +++
 drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++
 drivers/gpu/host1x/syncpt.c|  3 +++
 4 files changed, 48 insertions(+)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..2432a30ff6e2 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
u32 (*load)(struct host1x_syncpt *syncpt);
int (*cpu_incr)(struct host1x_syncpt *syncpt);
int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+   void (*assign_channel)(struct host1x_syncpt *syncpt,
+  struct host1x_channel *channel);
+   void (*set_protection)(struct host1x *host, bool enabled);
 };
 
 struct host1x_intr_ops {
@@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
host1x *host,
return host->syncpt_op->patch_wait(sp, patch_addr);
 }
 
+static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
+  struct host1x_syncpt *sp,
+  struct host1x_channel *ch)
+{
+   return host->syncpt_op->assign_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
+  bool enabled)
+{
+   return host->syncpt_op->set_protection(host, enabled);
+}
+
 static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
void (*syncpt_thresh_work)(struct work_struct *))
 {
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
 
syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
+   /* assign syncpoint to channel */
+   host1x_hw_syncpt_assign_channel(host, sp, ch);
+
job->syncpt_end = syncval;
 
/* add a setclass for modules that require it */
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..5d117ab1699e 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
void *patch_addr)
return 0;
 }
 
+static void syncpt_assign_channel(struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   struct host1x *host = sp->host;
+
+   if (!host->hv_regs)
+   return;
+
+   host1x_sync_writel(host,
+  HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+  HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+static void syncpt_set_protection(struct host1x *host, bool enabled)
+{
+#if HOST1X_HW >= 6
+   host1x_hypervisor_writel(host,
+enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
+HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
 static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+   .assign_channel = syncpt_assign_channel,
+   .set_protection = syncpt_set_protection,
 };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..fe4d963b3e2a 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
+
+   host1x_hw_syncpt_assign_channel(host, [i], NULL);
}
 
for (i = 0; i < host->info->nb_bases; i++)
@@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
host->bases = bases;
 
host1x_syncpt_restore(host);
+   host1x_hw_syncpt_set_protection(host, true);
 
/* Allocate sync point to use for clearing waits for expired fences */
host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
-- 
2.14.1



[PATCH 0/4] Miscellaneous improvements to Host1x and TegraDRM

2017-08-18 Thread Mikko Perttunen
Hi all,

here are some new features and improvements.

Patch 1 enables syncpoint protection which prevents channels from
touching syncpoints not belonging to them on Tegra186.

Patch 2 enables the gather filter which prevents userspace command
buffers from using CDMA commands usually reserved for the kernel.
A test is available at git://github.com/cyndis/host1x_test, branch
gather-filter.

Patch 3 greatly improves formatting of debug dumps spewed by host1x
in case of job timeouts. They are now actually readable by humans
without use of additional scripts.

Patch 4 is a simple aesthetical fix to the TegraDRM submit path.

Everything was tested on TX1 and TX2 and should be applied on the
previously posted Tegra186 support series.

Cheers,
Mikko

*** BLURB HERE ***

Mikko Perttunen (4):
  gpu: host1x: Enable Tegra186 syncpoint protection
  gpu: host1x: Enable gather filter
  gpu: host1x: Improve debug disassembly formatting
  drm/tegra: Use u64_to_user_ptr helper

 drivers/gpu/drm/tegra/drm.c |  9 +++---
 drivers/gpu/host1x/debug.c  | 14 -
 drivers/gpu/host1x/debug.h  | 14 ++---
 drivers/gpu/host1x/dev.h| 16 ++
 drivers/gpu/host1x/hw/channel_hw.c  | 25 
 drivers/gpu/host1x/hw/debug_hw.c| 46 ++---
 drivers/gpu/host1x/hw/debug_hw_1x01.c   |  8 ++---
 drivers/gpu/host1x/hw/debug_hw_1x06.c   |  9 +++---
 drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
 drivers/gpu/host1x/hw/syncpt_hw.c   | 26 
 drivers/gpu/host1x/syncpt.c |  3 ++
 12 files changed, 159 insertions(+), 35 deletions(-)

-- 
2.14.1



[PATCH 4/4] drm/tegra: Use u64_to_user_ptr helper

2017-08-18 Thread Mikko Perttunen
Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
to user pointers instead of writing out the cast manually.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e3331a2bc082..78c98736b0a5 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -389,11 +389,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_relocs = args->num_relocs;
unsigned int num_waitchks = args->num_waitchks;
struct drm_tegra_cmdbuf __user *cmdbufs =
-   (void __user *)(uintptr_t)args->cmdbufs;
-   struct drm_tegra_reloc __user *relocs =
-   (void __user *)(uintptr_t)args->relocs;
+   u64_to_user_ptr(args->cmdbufs);
+   struct drm_tegra_reloc __user *relocs = u64_to_user_ptr(args->relocs);
struct drm_tegra_waitchk __user *waitchks =
-   (void __user *)(uintptr_t)args->waitchks;
+   u64_to_user_ptr(args->waitchks);
struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct host1x_syncpt *sp;
@@ -520,7 +519,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
}
}
 
-   if (copy_from_user(, (void __user *)(uintptr_t)args->syncpts,
+   if (copy_from_user(, u64_to_user_ptr(args->syncpts),
   sizeof(syncpt))) {
err = -EFAULT;
goto fail;
-- 
2.14.1



[PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-18 Thread Mikko Perttunen
The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
 drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
 3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 0161da331702..5c0dc6bb51d1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
return err;
 }
 
+static void enable_gather_filter(struct host1x *host,
+struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   u32 val;
+
+   if (!host->hv_regs)
+   return;
+
+   val = host1x_hypervisor_readl(
+   host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+   val |= BIT(ch->id % 32);
+   host1x_hypervisor_writel(
+   host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+   host1x_ch_writel(ch,
+HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
   unsigned int index)
 {
ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+   enable_gather_filter(dev, ch);
return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 95e6f96142b9..2e8b635aa660 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+   return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+   host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+   return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+   host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index fce6e2c1ff4c..abbbc2641ce6 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+   return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+   host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+   return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+   host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
-- 
2.14.1



[PATCH 3/4] gpu: host1x: Improve debug disassembly formatting

2017-08-18 Thread Mikko Perttunen
The host1x driver prints out "disassembly" dumps of the command FIFO
and gather contents on submission timeouts. However, the output has
been quite difficult to read with unnecessary newlines and occasional
missing parentheses.

Fix these problems by using pr_cont to remove unnecessary newlines
and by fixing other small issues.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/debug.c| 14 ++-
 drivers/gpu/host1x/debug.h| 14 ---
 drivers/gpu/host1x/hw/debug_hw.c  | 46 ++-
 drivers/gpu/host1x/hw/debug_hw_1x01.c |  8 +++---
 drivers/gpu/host1x/hw/debug_hw_1x06.c |  9 ---
 5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 2aae0e63214c..dc77ec452ffc 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -40,7 +40,19 @@ void host1x_debug_output(struct output *o, const char *fmt, 
...)
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);
 
-   o->fn(o->ctx, o->buf, len);
+   o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+   va_list args;
+   int len;
+
+   va_start(args, fmt);
+   len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+   va_end(args);
+
+   o->fn(o->ctx, o->buf, len, true);
 }
 
 static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799f..990cce47e737 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -24,22 +24,28 @@
 struct host1x;
 
 struct output {
-   void (*fn)(void *ctx, const char *str, size_t len);
+   void (*fn)(void *ctx, const char *str, size_t len, bool cont);
void *ctx;
char buf[256];
 };
 
-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+   bool cont)
 {
seq_write((struct seq_file *)ctx, str, len);
 }
 
-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+  bool cont)
 {
-   pr_info("%s", str);
+   if (cont)
+   pr_cont("%s", str);
+   else
+   pr_info("%s", str);
 }
 
 void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, 
...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);
 
 extern unsigned int host1x_debug_trace_cmdbuf;
 
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 770d92e62d69..1e67667e308c 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,48 +40,59 @@ enum {
 
 static unsigned int show_channel_command(struct output *o, u32 val)
 {
-   unsigned int mask, subop;
+   unsigned int mask, subop, num;
 
switch (val >> 28) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
-   host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, 
mask=%02x, [",
+   host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, 
mask=%02x, [",
val >> 6 & 0x3ff,
val >> 16 & 0xfff, mask);
return hweight8(mask);
}
 
-   host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+   host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
return 0;
 
case HOST1X_OPCODE_INCR:
-   host1x_debug_output(o, "INCR(offset=%03x, [",
+   num = val & 0x;
+   host1x_debug_cont(o, "INCR(offset=%03x, [",
val >> 16 & 0xfff);
-   return val & 0x;
+   if (!num)
+   host1x_debug_cont(o, "])\n");
+
+   return num;
 
case HOST1X_OPCODE_NONINCR:
-   host1x_debug_output(o, "NONINCR(offset=%03x, [",
+   num = val & 0x;
+   host1x_debug_cont(o, "NONINCR(offset=%03x, [",
val >> 16 & 0xfff);
-   return val & 0x;
+   if (!num)
+   host1x_debug_cont(o, "])\n");
+
+   return num;
 
case HOST1X_OPCODE_MASK:
mask = val & 0x;
-   host1x_debug_output(o, "MASK(offset=

[PATCH v2 2/5] dt-bindings: Add bindings for nvidia,tegra186-bpmp-thermal

2017-07-24 Thread Mikko Perttunen
In Tegra186, the BPMP (Boot and Power Management Processor) implements
an interface that is used to read system temperatures, including CPU
cluster and GPU temperatures. This binding describes the thermal sensor
that is exposed by BPMP.

Signed-off-by: Mikko Perttunen 
Acked-by: Rob Herring 
---
 .../thermal/nvidia,tegra186-bpmp-thermal.txt   | 32 ++
 .../dt-bindings/thermal/tegra186-bpmp-thermal.h| 14 ++
 2 files changed, 46 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.txt
 create mode 100644 include/dt-bindings/thermal/tegra186-bpmp-thermal.h

diff --git 
a/Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.txt 
b/Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.txt
new file mode 100644
index ..276387dd6815
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.txt
@@ -0,0 +1,32 @@
+NVIDIA Tegra186 BPMP thermal sensor
+
+In Tegra186, the BPMP (Boot and Power Management Processor) implements an
+interface that is used to read system temperatures, including CPU cluster
+and GPU temperatures. This binding describes the thermal sensor that is
+exposed by BPMP.
+
+The BPMP thermal node must be located directly inside the main BPMP node. See
+../firmware/nvidia,tegra186-bpmp.txt for details of the BPMP binding.
+
+This node represents a thermal sensor. See thermal.txt for details of the
+core thermal binding.
+
+Required properties:
+- compatible:
+Array of strings.
+One of:
+- "nvidia,tegra186-bpmp-thermal".
+- #thermal-sensor-cells: Cell for sensor index.
+Single-cell integer.
+Must be <1>.
+
+Example:
+
+bpmp {
+   ...
+
+   bpmp_thermal: thermal {
+   compatible = "nvidia,tegra186-bpmp-thermal";
+   #thermal-sensor-cells = <1>;
+   };
+};
diff --git a/include/dt-bindings/thermal/tegra186-bpmp-thermal.h 
b/include/dt-bindings/thermal/tegra186-bpmp-thermal.h
new file mode 100644
index ..a96b8fa31aab
--- /dev/null
+++ b/include/dt-bindings/thermal/tegra186-bpmp-thermal.h
@@ -0,0 +1,14 @@
+/*
+ * This header provides constants for binding nvidia,tegra186-bpmp-thermal.
+ */
+
+#ifndef _DT_BINDINGS_THERMAL_TEGRA186_BPMP_THERMAL_H
+#define _DT_BINDINGS_THERMAL_TEGRA186_BPMP_THERMAL_H
+
+#define TEGRA186_BPMP_THERMAL_ZONE_CPU 2
+#define TEGRA186_BPMP_THERMAL_ZONE_GPU 3
+#define TEGRA186_BPMP_THERMAL_ZONE_AUX 4
+#define TEGRA186_BPMP_THERMAL_ZONE_PLLX 5
+#define TEGRA186_BPMP_THERMAL_ZONE_AO 6
+
+#endif
-- 
2.13.3



[PATCH v2 3/5] firmware: tegra: Expose tegra_bpmp_mrq_return

2017-07-24 Thread Mikko Perttunen
Expose and export the tegra_bpmp_mrq_return function for use of drivers
outside the core BPMP driver. This function is used to reply to
messages originating from the BPMP, which is required in the thermal
driver.

Signed-off-by: Mikko Perttunen 
---
 drivers/firmware/tegra/bpmp.c | 5 +++--
 include/soc/tegra/bpmp.h  | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index b25179517cc5..53996a33a863 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -387,8 +387,8 @@ static struct tegra_bpmp_mrq *tegra_bpmp_find_mrq(struct 
tegra_bpmp *bpmp,
return NULL;
 }
 
-static void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel,
- int code, const void *data, size_t size)
+void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel, int code,
+  const void *data, size_t size)
 {
unsigned long flags = channel->ib->flags;
struct tegra_bpmp *bpmp = channel->bpmp;
@@ -426,6 +426,7 @@ static void tegra_bpmp_mrq_return(struct tegra_bpmp_channel 
*channel,
mbox_client_txdone(bpmp->mbox.channel, 0);
}
 }
+EXPORT_SYMBOL_GPL(tegra_bpmp_mrq_return);
 
 static void tegra_bpmp_handle_mrq(struct tegra_bpmp *bpmp,
  unsigned int mrq,
diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
index 9ba65222bd3f..16d36dfe9fcd 100644
--- a/include/soc/tegra/bpmp.h
+++ b/include/soc/tegra/bpmp.h
@@ -117,6 +117,8 @@ int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
   struct tegra_bpmp_message *msg);
 int tegra_bpmp_transfer(struct tegra_bpmp *bpmp,
struct tegra_bpmp_message *msg);
+void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel, int code,
+  const void *data, size_t size);
 
 int tegra_bpmp_request_mrq(struct tegra_bpmp *bpmp, unsigned int mrq,
   tegra_bpmp_mrq_handler_t handler, void *data);
-- 
2.13.3



[PATCH v2 5/5] thermal: Add Tegra BPMP thermal sensor driver

2017-07-24 Thread Mikko Perttunen
On Tegra186, the BPMP (Boot and Power Management Processor) exposes an
interface to thermal sensors on the system-on-chip. This driver
implements access to the interface. It supports reading the
temperature, setting trip points and receiving notification of a
tripped trip point.

Signed-off-by: Mikko Perttunen 
---
v2:
- don't allocate space for disabled zones
- allow compilation with COMPILE_TEST

 drivers/thermal/Makefile |   2 +-
 drivers/thermal/tegra/Kconfig|   7 +
 drivers/thermal/tegra/Makefile   |   3 +-
 drivers/thermal/tegra/bpmp-thermal.c | 263 +++
 4 files changed, 273 insertions(+), 2 deletions(-)
 create mode 100644 drivers/thermal/tegra/bpmp-thermal.c

diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 094d7039981c..c03dccdba7b8 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -54,7 +54,7 @@ obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += 
intel_bxt_pmic_thermal.o
 obj-$(CONFIG_INTEL_PCH_THERMAL)+= intel_pch_thermal.o
 obj-$(CONFIG_ST_THERMAL)   += st/
 obj-$(CONFIG_QCOM_TSENS)   += qcom/
-obj-$(CONFIG_TEGRA_SOCTHERM)   += tegra/
+obj-y  += tegra/
 obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o
 obj-$(CONFIG_MTK_THERMAL)  += mtk_thermal.o
 obj-$(CONFIG_GENERIC_ADC_THERMAL)  += thermal-generic-adc.o
diff --git a/drivers/thermal/tegra/Kconfig b/drivers/thermal/tegra/Kconfig
index cec586ec7e4b..f8740f7852e3 100644
--- a/drivers/thermal/tegra/Kconfig
+++ b/drivers/thermal/tegra/Kconfig
@@ -10,4 +10,11 @@ config TEGRA_SOCTHERM
  zones to manage temperatures. This option is also required for the
  emergency thermal reset (thermtrip) feature to function.
 
+config TEGRA_BPMP_THERMAL
+   tristate "Tegra BPMP thermal sensing"
+   depends on TEGRA_BPMP || COMPILE_TEST
+   help
+Enable this option for support for sensing system temperature of NVIDIA
+Tegra systems-on-chip with the BPMP coprocessor (Tegra186).
+
 endmenu
diff --git a/drivers/thermal/tegra/Makefile b/drivers/thermal/tegra/Makefile
index 1ce1af2cf0f5..757abcd1feaf 100644
--- a/drivers/thermal/tegra/Makefile
+++ b/drivers/thermal/tegra/Makefile
@@ -1,4 +1,5 @@
-obj-$(CONFIG_TEGRA_SOCTHERM)   += tegra-soctherm.o
+obj-$(CONFIG_TEGRA_SOCTHERM)   += tegra-soctherm.o
+obj-$(CONFIG_TEGRA_BPMP_THERMAL)   += bpmp-thermal.o
 
 tegra-soctherm-y   := soctherm.o soctherm-fuse.o
 tegra-soctherm-$(CONFIG_ARCH_TEGRA_124_SOC)+= tegra124-soctherm.o
diff --git a/drivers/thermal/tegra/bpmp-thermal.c 
b/drivers/thermal/tegra/bpmp-thermal.c
new file mode 100644
index ..b0980dbca3b3
--- /dev/null
+++ b/drivers/thermal/tegra/bpmp-thermal.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Author:
+ * Mikko Perttunen 
+ * Aapo Vienamo
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+struct tegra_bpmp_thermal_zone {
+   struct tegra_bpmp_thermal *tegra;
+   struct thermal_zone_device *tzd;
+   struct work_struct tz_device_update_work;
+   unsigned int idx;
+};
+
+struct tegra_bpmp_thermal {
+   struct device *dev;
+   struct tegra_bpmp *bpmp;
+   unsigned int num_zones;
+   struct tegra_bpmp_thermal_zone **zones;
+};
+
+static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp)
+{
+   struct tegra_bpmp_thermal_zone *zone = data;
+   struct mrq_thermal_host_to_bpmp_request req;
+   union mrq_thermal_bpmp_to_host_response reply;
+   struct tegra_bpmp_message msg;
+   int err;
+
+   memset(, 0, sizeof(req));
+   req.type = CMD_THERMAL_GET_TEMP;
+   req.get_temp.zone = zone->idx;
+
+   memset(, 0, sizeof(msg));
+   msg.mrq = MRQ_THERMAL;
+   msg.tx.data = 
+   msg.tx.size = sizeof(req);
+   msg.rx.data = 
+   msg.rx.size = sizeof(reply);
+
+   err = tegra_bpmp_transfer(zone->tegra->bpmp, );
+   if (err)
+   return err;
+
+   *out_temp = reply.get_temp.temp;
+
+   return 0;
+}
+
+static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
+{
+   struct tegra_bpmp_thermal_zone *zone = data;
+   struct mrq_thermal_host_to_bpmp_request req;
+   struct tegra_bpmp_message msg;
+
+   memset(, 0, sizeof(req));
+   req.type = CMD_THERMAL_SET_TRIP;
+   req.set_trip.zone = zone->idx;
+   req.s

[PATCH v2 4/5] firmware: tegra: Add stubs when BPMP not enabled

2017-07-24 Thread Mikko Perttunen
Add static inline stubs to bpmp.h when CONFIG_BPMP is not enabled.
This allows building BPMP-related drivers with COMPILE_TEST.

Signed-off-by: Mikko Perttunen 
---
v2:
- added patch

 include/soc/tegra/bpmp.h | 42 +++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
index 16d36dfe9fcd..4f9adb7790cc 100644
--- a/include/soc/tegra/bpmp.h
+++ b/include/soc/tegra/bpmp.h
@@ -96,9 +96,6 @@ struct tegra_bpmp {
struct genpd_onecell_data genpd;
 };
 
-struct tegra_bpmp *tegra_bpmp_get(struct device *dev);
-void tegra_bpmp_put(struct tegra_bpmp *bpmp);
-
 struct tegra_bpmp_message {
unsigned int mrq;
 
@@ -113,6 +110,9 @@ struct tegra_bpmp_message {
} rx;
 };
 
+#if IS_ENABLED(CONFIG_TEGRA_BPMP)
+struct tegra_bpmp *tegra_bpmp_get(struct device *dev);
+void tegra_bpmp_put(struct tegra_bpmp *bpmp);
 int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
   struct tegra_bpmp_message *msg);
 int tegra_bpmp_transfer(struct tegra_bpmp *bpmp,
@@ -124,6 +124,42 @@ int tegra_bpmp_request_mrq(struct tegra_bpmp *bpmp, 
unsigned int mrq,
   tegra_bpmp_mrq_handler_t handler, void *data);
 void tegra_bpmp_free_mrq(struct tegra_bpmp *bpmp, unsigned int mrq,
 void *data);
+#else
+static inline struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
+{
+   return ERR_PTR(-ENOTSUPP);
+}
+static inline void tegra_bpmp_put(struct tegra_bpmp *bpmp)
+{
+}
+static inline int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
+struct tegra_bpmp_message *msg)
+{
+   return -ENOTSUPP;
+}
+static inline int tegra_bpmp_transfer(struct tegra_bpmp *bpmp,
+ struct tegra_bpmp_message *msg)
+{
+   return -ENOTSUPP;
+}
+static inline void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel,
+int code, const void *data,
+size_t size)
+{
+}
+
+static inline int tegra_bpmp_request_mrq(struct tegra_bpmp *bpmp,
+unsigned int mrq,
+tegra_bpmp_mrq_handler_t handler,
+void *data)
+{
+   return -ENOTSUPP;
+}
+static inline void tegra_bpmp_free_mrq(struct tegra_bpmp *bpmp,
+  unsigned int mrq, void *data)
+{
+}
+#endif
 
 #if IS_ENABLED(CONFIG_CLK_TEGRA_BPMP)
 int tegra_bpmp_init_clocks(struct tegra_bpmp *bpmp);
-- 
2.13.3



[PATCH v2 1/5] arm64: tegra: Add BPMP thermal sensor to Tegra186

2017-07-24 Thread Mikko Perttunen
This adds the thermal sensor device provided by the BPMP, and the
relevant thermal sensors to the Tegra186 device tree.

Signed-off-by: Mikko Perttunen 
---
v2:
- added trips and cooling-maps nodes
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 103 +++
 1 file changed, 103 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 0b0552c9f7dd..6adcf25e13d1 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 
 / {
compatible = "nvidia,tegra186";
@@ -451,6 +452,108 @@
#size-cells = <0>;
status = "disabled";
};
+
+   bpmp_thermal: thermal {
+   compatible = "nvidia,tegra186-bpmp-thermal";
+   #thermal-sensor-cells = <1>;
+   };
+   };
+
+   thermal-zones {
+   a57 {
+   polling-delay = <0>;
+   polling-delay-passive = <1000>;
+
+   thermal-sensors =
+   <_thermal TEGRA186_BPMP_THERMAL_ZONE_CPU>;
+
+   trips {
+   critical {
+   temperature = <101000>;
+   hysteresis = <0>;
+   type = "critical";
+   };
+   };
+
+   cooling-maps {
+   };
+   };
+
+   denver {
+   polling-delay = <0>;
+   polling-delay-passive = <1000>;
+
+   thermal-sensors =
+   <_thermal TEGRA186_BPMP_THERMAL_ZONE_AUX>;
+
+   trips {
+   critical {
+   temperature = <101000>;
+   hysteresis = <0>;
+   type = "critical";
+   };
+   };
+
+   cooling-maps {
+   };
+   };
+
+   gpu {
+   polling-delay = <0>;
+   polling-delay-passive = <1000>;
+
+   thermal-sensors =
+   <_thermal TEGRA186_BPMP_THERMAL_ZONE_GPU>;
+
+   trips {
+   critical {
+   temperature = <101000>;
+   hysteresis = <0>;
+   type = "critical";
+   };
+   };
+
+   cooling-maps {
+   };
+   };
+
+   pll {
+   polling-delay = <0>;
+   polling-delay-passive = <1000>;
+
+   thermal-sensors =
+   <_thermal TEGRA186_BPMP_THERMAL_ZONE_PLLX>;
+
+   trips {
+   critical {
+   temperature = <101000>;
+   hysteresis = <0>;
+   type = "critical";
+   };
+   };
+
+   cooling-maps {
+   };
+   };
+
+   always_on {
+   polling-delay = <0>;
+   polling-delay-passive = <1000>;
+
+   thermal-sensors =
+   <_thermal TEGRA186_BPMP_THERMAL_ZONE_AO>;
+
+   trips {
+   critical {
+   temperature = <101000>;
+   hysteresis = <0>;
+   type = "critical";
+   };
+   };
+
+   cooling-maps {
+   };
+   };
};
 
timer {
-- 
2.13.3



[PATCH v3 4/6] gpu: host1x: Disassemble more instructions

2017-09-28 Thread Mikko Perttunen
The disassembler for debug dumps was missing some newer host1x opcodes.
Add disassembly support for these.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/debug_hw.c  | 59 ---
 drivers/gpu/host1x/hw/debug_hw_1x01.c |  2 +-
 drivers/gpu/host1x/hw/debug_hw_1x06.c |  3 +-
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 1e67667e308c..989476801f9d 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -30,6 +30,13 @@ enum {
HOST1X_OPCODE_IMM   = 0x04,
HOST1X_OPCODE_RESTART   = 0x05,
HOST1X_OPCODE_GATHER= 0x06,
+   HOST1X_OPCODE_SETSTRMID = 0x07,
+   HOST1X_OPCODE_SETAPPID  = 0x08,
+   HOST1X_OPCODE_SETPYLD   = 0x09,
+   HOST1X_OPCODE_INCR_W= 0x0a,
+   HOST1X_OPCODE_NONINCR_W = 0x0b,
+   HOST1X_OPCODE_GATHER_W  = 0x0c,
+   HOST1X_OPCODE_RESTART_W = 0x0d,
HOST1X_OPCODE_EXTEND= 0x0e,
 };
 
@@ -38,11 +45,16 @@ enum {
HOST1X_OPCODE_EXTEND_RELEASE_MLOCK  = 0x01,
 };
 
-static unsigned int show_channel_command(struct output *o, u32 val)
+#define INVALID_PAYLOAD0x
+
+static unsigned int show_channel_command(struct output *o, u32 val,
+u32 *payload)
 {
-   unsigned int mask, subop, num;
+   unsigned int mask, subop, num, opcode;
+
+   opcode = val >> 28;
 
-   switch (val >> 28) {
+   switch (opcode) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
@@ -97,6 +109,44 @@ static unsigned int show_channel_command(struct output *o, 
u32 val)
val >> 14 & 0x1, val & 0x3fff);
return 1;
 
+#if HOST1X_HW >= 6
+   case HOST1X_OPCODE_SETSTRMID:
+   host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
+ val & 0x3f);
+   return 0;
+
+   case HOST1X_OPCODE_SETAPPID:
+   host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
+   return 0;
+
+   case HOST1X_OPCODE_SETPYLD:
+   *payload = val & 0x;
+   host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
+   return 0;
+
+   case HOST1X_OPCODE_INCR_W:
+   case HOST1X_OPCODE_NONINCR_W:
+   host1x_debug_cont(o, "%s(offset=%06x, ",
+ opcode == HOST1X_OPCODE_INCR_W ?
+   "INCR_W" : "NONINCR_W",
+ val & 0x3f);
+   if (*payload == 0) {
+   host1x_debug_cont(o, "[])\n");
+   return 0;
+   } else if (*payload == INVALID_PAYLOAD) {
+   host1x_debug_cont(o, "unknown)\n");
+   return 0;
+   } else {
+   host1x_debug_cont(o, "[");
+   return *payload;
+   }
+
+   case HOST1X_OPCODE_GATHER_W:
+   host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
+ val & 0x3fff);
+   return 2;
+#endif
+
case HOST1X_OPCODE_EXTEND:
subop = val >> 24 & 0xf;
if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
@@ -122,6 +172,7 @@ static void show_gather(struct output *o, phys_addr_t 
phys_addr,
/* Map dmaget cursor to corresponding mem handle */
u32 offset = phys_addr - pin_addr;
unsigned int data_count = 0, i;
+   u32 payload = INVALID_PAYLOAD;
 
/*
 * Sometimes we're given different hardware address to the same
@@ -139,7 +190,7 @@ static void show_gather(struct output *o, phys_addr_t 
phys_addr,
 
if (!data_count) {
host1x_debug_output(o, "%08x: %08x: ", addr, val);
-   data_count = show_channel_command(o, val);
+   data_count = show_channel_command(o, val, );
} else {
host1x_debug_cont(o, "%08x%s", val,
data_count > 1 ? ", " : "])\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c 
b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 09e1aa7bb5dd..8790d5fd5f20 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -112,7 +112,7 @@ static void host1x_debug_show_channel_fifo(struct host1x 
*host,
 
if (!data_count) {
host1x_debug_output(o, "%08x: ", val);
-   data_count = show_channel_command(o, val);
+   dat

[PATCH v3 0/6] Miscellaneous improvements to Host1x and TegraDRM

2017-09-28 Thread Mikko Perttunen
New in v3:
- Renamed *syncpt_assign_channel to *syncpt_assign_to_channel
- Disassembler ignores opcodes not supported on the particular
  chip
- Further cleanup in u64_to_user_ptr patch

New in v2:
- Changes in syncpoint protection and u64_to_user_ptr patches.
  See the patches for notes.
- Added patch to support more opcodes in the debug dump
  disassembly.
- Added patch to fix an incorrect comment.

Thanks,
Mikko

Patch v1 notes:

Hi all,

here are some new features and improvements.

Patch 1 enables syncpoint protection which prevents channels from
touching syncpoints not belonging to them on Tegra186.

Patch 2 enables the gather filter which prevents userspace command
buffers from using CDMA commands usually reserved for the kernel.
A test is available at git://github.com/cyndis/host1x_test, branch
gather-filter.

Patch 3 greatly improves formatting of debug dumps spewed by host1x
in case of job timeouts. They are now actually readable by humans
without use of additional scripts.

Patch 4 is a simple aesthetical fix to the TegraDRM submit path.

Everything was tested on TX1 and TX2 and should be applied on the
previously posted Tegra186 support series.

Cheers,
Mikko


Mikko Perttunen (6):
  gpu: host1x: Enable Tegra186 syncpoint protection
  gpu: host1x: Enable gather filter
  gpu: host1x: Improve debug disassembly formatting
  gpu: host1x: Disassemble more instructions
  gpu: host1x: Fix incorrect comment for channel_request
  drm/tegra: Use u64_to_user_ptr helper

 drivers/gpu/drm/tegra/drm.c |  29 
 drivers/gpu/host1x/channel.c|   3 +-
 drivers/gpu/host1x/debug.c  |  14 +++-
 drivers/gpu/host1x/debug.h  |  14 ++--
 drivers/gpu/host1x/dev.h|  15 
 drivers/gpu/host1x/hw/channel_hw.c  |  25 +++
 drivers/gpu/host1x/hw/debug_hw.c| 103 ++--
 drivers/gpu/host1x/hw/debug_hw_1x01.c   |  10 +--
 drivers/gpu/host1x/hw/debug_hw_1x06.c   |  12 ++--
 drivers/gpu/host1x/hw/hw_host1x04_channel.h |  12 
 drivers/gpu/host1x/hw/hw_host1x05_channel.h |  12 
 drivers/gpu/host1x/hw/syncpt_hw.c   |  46 +
 drivers/gpu/host1x/syncpt.c |   8 +++
 13 files changed, 252 insertions(+), 51 deletions(-)

-- 
2.14.1



[PATCH v3 6/6] drm/tegra: Use u64_to_user_ptr helper

2017-09-28 Thread Mikko Perttunen
Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
to user pointers instead of writing out the cast manually. Also do
some other cleanup with user pointers to make them stand out more
and look cleaner.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 130d193192ee..943bdf88c4a2 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -386,12 +386,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_cmdbufs = args->num_cmdbufs;
unsigned int num_relocs = args->num_relocs;
unsigned int num_waitchks = args->num_waitchks;
-   struct drm_tegra_cmdbuf __user *cmdbufs =
-   (void __user *)(uintptr_t)args->cmdbufs;
-   struct drm_tegra_reloc __user *relocs =
-   (void __user *)(uintptr_t)args->relocs;
-   struct drm_tegra_waitchk __user *waitchks =
-   (void __user *)(uintptr_t)args->waitchks;
+   struct drm_tegra_cmdbuf __user *user_cmdbufs;
+   struct drm_tegra_reloc __user *user_relocs;
+   struct drm_tegra_waitchk __user *user_waitchks;
+   struct drm_tegra_syncpt __user *user_syncpt;
struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct drm_gem_object **refs;
@@ -400,6 +398,11 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_refs;
int err;
 
+   user_cmdbufs = u64_to_user_ptr(args->cmdbufs);
+   user_relocs = u64_to_user_ptr(args->relocs);
+   user_waitchks = u64_to_user_ptr(args->waitchks);
+   user_syncpt = u64_to_user_ptr(args->syncpts);
+
/* We don't yet support other than one syncpt_incr struct per submit */
if (args->num_syncpts != 1)
return -EINVAL;
@@ -440,7 +443,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
struct tegra_bo *obj;
u64 offset;
 
-   if (copy_from_user(, cmdbufs, sizeof(cmdbuf))) {
+   if (copy_from_user(, user_cmdbufs, sizeof(cmdbuf))) {
err = -EFAULT;
goto fail;
}
@@ -476,7 +479,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
host1x_job_add_gather(job, bo, cmdbuf.words, cmdbuf.offset);
num_cmdbufs--;
-   cmdbufs++;
+   user_cmdbufs++;
}
 
/* copy and resolve relocations from submit */
@@ -485,7 +488,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
struct tegra_bo *obj;
 
err = host1x_reloc_copy_from_user(>relocarray[num_relocs],
- [num_relocs], drm,
+ _relocs[num_relocs], drm,
  file);
if (err < 0)
goto fail;
@@ -519,9 +522,8 @@ int tegra_drm_submit(struct tegra_drm_context *context,
struct host1x_waitchk *wait = >waitchk[num_waitchks];
struct tegra_bo *obj;
 
-   err = host1x_waitchk_copy_from_user(wait,
-   [num_waitchks],
-   file);
+   err = host1x_waitchk_copy_from_user(
+   wait, _waitchks[num_waitchks], file);
if (err < 0)
goto fail;
 
@@ -539,8 +541,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
}
}
 
-   if (copy_from_user(, (void __user *)(uintptr_t)args->syncpts,
-  sizeof(syncpt))) {
+   if (copy_from_user(, user_syncpt, sizeof(syncpt))) {
err = -EFAULT;
goto fail;
}
-- 
2.14.1



[PATCH v3 3/6] gpu: host1x: Improve debug disassembly formatting

2017-09-28 Thread Mikko Perttunen
The host1x driver prints out "disassembly" dumps of the command FIFO
and gather contents on submission timeouts. However, the output has
been quite difficult to read with unnecessary newlines and occasional
missing parentheses.

Fix these problems by using pr_cont to remove unnecessary newlines
and by fixing other small issues.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Dmitry Osipenko 
Tested-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/debug.c| 14 ++-
 drivers/gpu/host1x/debug.h| 14 ---
 drivers/gpu/host1x/hw/debug_hw.c  | 46 ++-
 drivers/gpu/host1x/hw/debug_hw_1x01.c |  8 +++---
 drivers/gpu/host1x/hw/debug_hw_1x06.c |  9 ---
 5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 2aae0e63214c..dc77ec452ffc 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -40,7 +40,19 @@ void host1x_debug_output(struct output *o, const char *fmt, 
...)
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);
 
-   o->fn(o->ctx, o->buf, len);
+   o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+   va_list args;
+   int len;
+
+   va_start(args, fmt);
+   len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+   va_end(args);
+
+   o->fn(o->ctx, o->buf, len, true);
 }
 
 static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799f..990cce47e737 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -24,22 +24,28 @@
 struct host1x;
 
 struct output {
-   void (*fn)(void *ctx, const char *str, size_t len);
+   void (*fn)(void *ctx, const char *str, size_t len, bool cont);
void *ctx;
char buf[256];
 };
 
-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+   bool cont)
 {
seq_write((struct seq_file *)ctx, str, len);
 }
 
-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+  bool cont)
 {
-   pr_info("%s", str);
+   if (cont)
+   pr_cont("%s", str);
+   else
+   pr_info("%s", str);
 }
 
 void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, 
...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);
 
 extern unsigned int host1x_debug_trace_cmdbuf;
 
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 770d92e62d69..1e67667e308c 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,48 +40,59 @@ enum {
 
 static unsigned int show_channel_command(struct output *o, u32 val)
 {
-   unsigned int mask, subop;
+   unsigned int mask, subop, num;
 
switch (val >> 28) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
-   host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, 
mask=%02x, [",
+   host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, 
mask=%02x, [",
val >> 6 & 0x3ff,
val >> 16 & 0xfff, mask);
return hweight8(mask);
}
 
-   host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+   host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
return 0;
 
case HOST1X_OPCODE_INCR:
-   host1x_debug_output(o, "INCR(offset=%03x, [",
+   num = val & 0x;
+   host1x_debug_cont(o, "INCR(offset=%03x, [",
val >> 16 & 0xfff);
-   return val & 0x;
+   if (!num)
+   host1x_debug_cont(o, "])\n");
+
+   return num;
 
case HOST1X_OPCODE_NONINCR:
-   host1x_debug_output(o, "NONINCR(offset=%03x, [",
+   num = val & 0x;
+   host1x_debug_cont(o, "NONINCR(offset=%03x, [",
val >> 16 & 0xfff);
-   return val & 0x;
+   if (!num)
+   host1x_debug_cont(o, "])\n");
+
+   return num;
 
case HOST1X_OPCODE_MASK:
mask = val & 0x;
-  

[PATCH v3 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

2017-09-28 Thread Mikko Perttunen
Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.h   | 15 +
 drivers/gpu/host1x/hw/channel_hw.c |  3 +++
 drivers/gpu/host1x/hw/syncpt_hw.c  | 46 ++
 drivers/gpu/host1x/syncpt.c|  8 +++
 4 files changed, 72 insertions(+)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..502769726480 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
u32 (*load)(struct host1x_syncpt *syncpt);
int (*cpu_incr)(struct host1x_syncpt *syncpt);
int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+   void (*assign_to_channel)(struct host1x_syncpt *syncpt,
+ struct host1x_channel *channel);
+   void (*enable_protection)(struct host1x *host);
 };
 
 struct host1x_intr_ops {
@@ -186,6 +189,18 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
host1x *host,
return host->syncpt_op->patch_wait(sp, patch_addr);
 }
 
+static inline void host1x_hw_syncpt_assign_to_channel(
+   struct host1x *host, struct host1x_syncpt *sp,
+   struct host1x_channel *ch)
+{
+   return host->syncpt_op->assign_to_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
+{
+   return host->syncpt_op->enable_protection(host);
+}
+
 static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
void (*syncpt_thresh_work)(struct work_struct *))
 {
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..b929d7f1e291 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
 
syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
+   /* assign syncpoint to channel */
+   host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+
job->syncpt_end = syncval;
 
/* add a setclass for modules that require it */
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..7dfd47d74f89 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
void *patch_addr)
return 0;
 }
 
+/**
+ * syncpt_assign_to_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   struct host1x *host = sp->host;
+
+   if (!host->hv_regs)
+   return;
+
+   host1x_sync_writel(host,
+  HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+  HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
+{
+#if HOST1X_HW >= 6
+   if (!host->hv_regs)
+   return;
+
+   host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
 static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+   .assign_to_channel = syncpt_assign_to_channel,
+   .enable_protection = syncpt_enable_protection,
 };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..bce7cd6db724 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
+
+   

[PATCH v3 2/6] gpu: host1x: Enable gather filter

2017-09-28 Thread Mikko Perttunen
The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
 drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
 3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index b929d7f1e291..fb8132fc477b 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
return err;
 }
 
+static void enable_gather_filter(struct host1x *host,
+struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   u32 val;
+
+   if (!host->hv_regs)
+   return;
+
+   val = host1x_hypervisor_readl(
+   host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+   val |= BIT(ch->id % 32);
+   host1x_hypervisor_writel(
+   host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+   host1x_ch_writel(ch,
+HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
   unsigned int index)
 {
ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+   enable_gather_filter(dev, ch);
return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 95e6f96142b9..2e8b635aa660 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+   return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+   host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+   return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+   host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index fce6e2c1ff4c..abbbc2641ce6 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+   return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+   host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+   return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+   host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
-- 
2.14.1



[PATCH v3 5/6] gpu: host1x: Fix incorrect comment for channel_request

2017-09-28 Thread Mikko Perttunen
This function actually doesn't sleep in the version that was merged.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/channel.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index db9b91d1384c..2fb93c27c1d9 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -128,8 +128,7 @@ static struct host1x_channel *acquire_unused_channel(struct 
host1x *host)
  * host1x_channel_request() - Allocate a channel
  * @device: Host1x unit this channel will be used to send commands to
  *
- * Allocates a new host1x channel for @device. If there are no free channels,
- * this will sleep until one becomes available. May return NULL if CDMA
+ * Allocates a new host1x channel for @device. May return NULL if CDMA
  * initialization fails.
  */
 struct host1x_channel *host1x_channel_request(struct device *dev)
-- 
2.14.1



Re: [PATCH v3 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

2017-09-30 Thread Mikko Perttunen

On 09/30/2017 05:41 AM, Dmitry Osipenko wrote:

On 28.09.2017 15:50, Mikko Perttunen wrote:

..
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..b929d7f1e291 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
  
  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
  
+	/* assign syncpoint to channel */

+   host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+


Since you've preserved the comment, what about to extend it with a brief
explanation of what actually the 'assignment' does? Like that CDMA will stop
execution on touching any syncpoint other then the assigned one.


Whoops, I actually forgot to remove that :) I think the best would be to 
remove the comment here and have a more proper description of the 
feature somewhere else.


Mikko


[PATCH 5/6] arm64: tegra: Add Tegra194 chip device tree

2018-01-07 Thread Mikko Perttunen
Add the chip-level device tree, including binding headers, for the
NVIDIA Tegra194 "Xavier" system-on-chip. Only a small subset of devices
are initially available, enough to boot to UART console.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 334 +
 include/dt-bindings/clock/tegra194-clock.h |  59 +
 include/dt-bindings/gpio/tegra194-gpio.h   |  59 +
 include/dt-bindings/reset/tegra194-reset.h |  40 
 4 files changed, 492 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
new file mode 100644
index ..51eff420816d
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/ {
+   compatible = "nvidia,tegra194";
+   interrupt-parent = <>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   uarta: serial@310 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0310 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTA>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTA>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartb: serial@311 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0311 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTB>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTB>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartd: serial@313 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0313 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTD>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTD>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uarte: serial@314 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0314 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTE>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTE>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartf: serial@315 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0315 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTF>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTF>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   gen1_i2c: i2c@316 {
+   compatible = "nvidia,tegra194-i2c", "nvidia,tegra114-i2c";
+   reg = <0x0 0x0316 0x0 0x1>;
+   interrupts = ;
+   #address-cells = <1>;
+   #size-cells = <0>;
+   clocks = < TEGRA194_CLK_I2C1>;
+   clock-names = "div-clk";
+   resets = < TEGRA194_RESET_I2C1>;
+   reset-names = "i2c";
+   status = "disabled";
+   };
+
+   uarth: serial@317 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0317 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTH>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTH>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   cam_i2c: i2c@31800

[PATCH 2/6] soc/tegra: Add Tegra194 SoC configuration option

2018-01-07 Thread Mikko Perttunen
Add the configuration option to enable support for the Tegra194
system-on-chip, and enable it by default in the arm64 defconfig.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/configs/defconfig |  1 +
 drivers/soc/tegra/Kconfig| 10 ++
 2 files changed, 11 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 78f669a21a9b..5a8f15baa850 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -537,6 +537,7 @@ CONFIG_ROCKCHIP_PM_DOMAINS=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
 CONFIG_ARCH_TEGRA_186_SOC=y
+CONFIG_ARCH_TEGRA_194_SOC=y
 CONFIG_EXTCON_USB_GPIO=y
 CONFIG_IIO=y
 CONFIG_EXYNOS_ADC=y
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
index 89ebe22a3e27..fe4481676da6 100644
--- a/drivers/soc/tegra/Kconfig
+++ b/drivers/soc/tegra/Kconfig
@@ -104,6 +104,16 @@ config ARCH_TEGRA_186_SOC
  multi-format support, ISP for image capture processing and BPMP for
  power management.
 
+config ARCH_TEGRA_194_SOC
+   bool "NVIDIA Tegra194 SoC"
+   select MAILBOX
+   select TEGRA_BPMP
+   select TEGRA_HSP_MBOX
+   select TEGRA_IVC
+   select SOC_TEGRA_PMC
+   help
+ Enable support for the NVIDIA Tegra194 SoC.
+
 endif
 endif
 
-- 
2.1.4



[PATCH 1/6] firmware: tegra: Simplify channel management

2018-01-07 Thread Mikko Perttunen
The Tegra194 BPMP only implements 5 channels (4 to BPMP, 1 to CCPLEX),
and they are not placed contiguously in memory. The current channel
management in the BPMP driver does not support this.

Simplify and refactor the channel management such that only one atomic
transmit channel and one receive channel are supported, and channels
are not required to be placed contiguously in memory. The same
configuration also works on T186 so we end up with less code.

Signed-off-by: Mikko Perttunen 
---
 drivers/firmware/tegra/bpmp.c | 142 +++---
 include/soc/tegra/bpmp.h  |   4 +-
 2 files changed, 66 insertions(+), 80 deletions(-)

diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index a7f461f2e650..81bc2dce8626 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -70,57 +70,20 @@ void tegra_bpmp_put(struct tegra_bpmp *bpmp)
 }
 EXPORT_SYMBOL_GPL(tegra_bpmp_put);
 
-static int tegra_bpmp_channel_get_index(struct tegra_bpmp_channel *channel)
-{
-   return channel - channel->bpmp->channels;
-}
-
 static int
 tegra_bpmp_channel_get_thread_index(struct tegra_bpmp_channel *channel)
 {
struct tegra_bpmp *bpmp = channel->bpmp;
-   unsigned int offset, count;
+   unsigned int count;
int index;
 
-   offset = bpmp->soc->channels.thread.offset;
count = bpmp->soc->channels.thread.count;
 
-   index = tegra_bpmp_channel_get_index(channel);
-   if (index < 0)
-   return index;
-
-   if (index < offset || index >= offset + count)
+   index = channel - channel->bpmp->threaded_channels;
+   if (index < 0 || index >= count)
return -EINVAL;
 
-   return index - offset;
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_thread(struct tegra_bpmp *bpmp, unsigned int index)
-{
-   unsigned int offset = bpmp->soc->channels.thread.offset;
-   unsigned int count = bpmp->soc->channels.thread.count;
-
-   if (index >= count)
-   return NULL;
-
-   return >channels[offset + index];
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_tx(struct tegra_bpmp *bpmp)
-{
-   unsigned int offset = bpmp->soc->channels.cpu_tx.offset;
-
-   return >channels[offset + smp_processor_id()];
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_rx(struct tegra_bpmp *bpmp)
-{
-   unsigned int offset = bpmp->soc->channels.cpu_rx.offset;
-
-   return >channels[offset];
+   return index;
 }
 
 static bool tegra_bpmp_message_valid(const struct tegra_bpmp_message *msg)
@@ -271,11 +234,7 @@ tegra_bpmp_write_threaded(struct tegra_bpmp *bpmp, 
unsigned int mrq,
goto unlock;
}
 
-   channel = tegra_bpmp_channel_get_thread(bpmp, index);
-   if (!channel) {
-   err = -EINVAL;
-   goto unlock;
-   }
+   channel = >threaded_channels[index];
 
if (!tegra_bpmp_master_free(channel)) {
err = -EBUSY;
@@ -328,12 +287,18 @@ int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
if (!tegra_bpmp_message_valid(msg))
return -EINVAL;
 
-   channel = tegra_bpmp_channel_get_tx(bpmp);
+   channel = bpmp->tx_channel;
+
+   spin_lock(>atomic_tx_lock);
 
err = tegra_bpmp_channel_write(channel, msg->mrq, MSG_ACK,
   msg->tx.data, msg->tx.size);
-   if (err < 0)
+   if (err < 0) {
+   spin_unlock(>atomic_tx_lock);
return err;
+   }
+
+   spin_unlock(>atomic_tx_lock);
 
err = mbox_send_message(bpmp->mbox.channel, NULL);
if (err < 0)
@@ -607,7 +572,7 @@ static void tegra_bpmp_handle_rx(struct mbox_client 
*client, void *data)
unsigned int i, count;
unsigned long *busy;
 
-   channel = tegra_bpmp_channel_get_rx(bpmp);
+   channel = bpmp->rx_channel;
count = bpmp->soc->channels.thread.count;
busy = bpmp->threaded.busy;
 
@@ -619,9 +584,7 @@ static void tegra_bpmp_handle_rx(struct mbox_client 
*client, void *data)
for_each_set_bit(i, busy, count) {
struct tegra_bpmp_channel *channel;
 
-   channel = tegra_bpmp_channel_get_thread(bpmp, i);
-   if (!channel)
-   continue;
+   channel = >threaded_channels[i];
 
if (tegra_bpmp_master_acked(channel)) {
tegra_bpmp_channel_signal(channel);
@@ -698,7 +661,6 @@ static void tegra_bpmp_channel_cleanup(struct 
tegra_bpmp_channel *channel)
 
 static int tegra_bpmp_probe(struct platform_device *pdev)
 {
-   struct tegra_bpmp_channel *channel;
struct tegra_bpmp *bpmp;
unsigned int i;
char tag[32];
@@ -758,24 +720,45 @@ static int tegra_bpmp_probe(stru

[PATCH 6/6] arm64: tegra: Add device tree for the Tegra194 P2972-0000 board

2018-01-07 Thread Mikko Perttunen
Add device tree files for the Tegra194 P2972- development board.
The board consists of the P2888 compute module and the P2822 baseboard.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/Makefile|   1 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 246 +
 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts |  14 ++
 3 files changed, 261 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts

diff --git a/arch/arm64/boot/dts/nvidia/Makefile 
b/arch/arm64/boot/dts/nvidia/Makefile
index 676aa2f238d1..7c13d7df484e 100644
--- a/arch/arm64/boot/dts/nvidia/Makefile
+++ b/arch/arm64/boot/dts/nvidia/Makefile
@@ -5,3 +5,4 @@ dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
 dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
 dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-smaug.dtb
 dtb-$(CONFIG_ARCH_TEGRA_186_SOC) += tegra186-p2771-.dtb
+dtb-$(CONFIG_ARCH_TEGRA_194_SOC) += tegra194-p2972-.dtb
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
new file mode 100644
index ..5b337f883d2c
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tegra194.dtsi"
+
+#include 
+
+/ {
+   model = "NVIDIA Tegra194 P2888 Processor Module";
+   compatible = "nvidia,p2888", "nvidia,tegra194";
+
+   aliases {
+   sdhci0 = "/sdhci@346";
+   sdhci1 = "/sdhci@340";
+   serial0 = 
+   i2c0 = "/bpmp/i2c";
+   i2c1 = "/i2c@316";
+   i2c2 = "/i2c@c24";
+   i2c3 = "/i2c@318";
+   i2c4 = "/i2c@319";
+   i2c5 = "/i2c@31c";
+   i2c6 = "/i2c@c25";
+   i2c7 = "/i2c@31e";
+   };
+
+   chosen {
+   bootargs = "earlycon console=ttyS0,115200n8";
+   stdout-path = "serial0:115200n8";
+   };
+
+   serial@c28 {
+   status = "okay";
+   };
+
+   /* SDMMC1 (SD/MMC) */
+   sdhci@340 {
+/*
+   cd-gpios = < TEGRA194_MAIN_GPIO(A, 0) GPIO_ACTIVE_LOW>;
+*/
+   };
+
+   /* SDMMC4 (eMMC) */
+   sdhci@346 {
+   status = "okay";
+   bus-width = <8>;
+   non-removable;
+
+   vqmmc-supply = <_1v8ls>;
+   vmmc-supply = <_emmc_3v3>;
+   };
+
+   pmc@c36 {
+   nvidia,invert-interrupt;
+   };
+
+   bpmp {
+   i2c {
+   status = "okay";
+
+   pmic: pmic@3c {
+   compatible = "maxim,max20024";
+   reg = <0x3c>;
+
+   interrupts = ;
+   #interrupt-cells = <2>;
+   interrupt-controller;
+
+   #gpio-cells = <2>;
+   gpio-controller;
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_default>;
+
+   max20024_default: pinmux {
+   gpio0 {
+   pins = "gpio0";
+   function = "gpio";
+   };
+
+   gpio1 {
+   pins = "gpio1";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio2 {
+   pins = "gpio2";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio3 {
+   pins = "gpio3";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio4 {
+   pins = "gpio4";
+   function = "32k-ou

[PATCH 3/6] soc/tegra: pmc: Add Tegra194 compatibility string

2018-01-07 Thread Mikko Perttunen
The Tegra194 PMC is mostly compatible with Tegra186, including in all
currently supported features. As such, add a new compatibility string
but point to the existing Tegra186 SoC data for now.

Signed-off-by: Mikko Perttunen 
---
 drivers/soc/tegra/pmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index ce62a47a6647..a2df230bf51a 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -1920,6 +1920,7 @@ static const struct tegra_pmc_soc tegra186_pmc_soc = {
 };
 
 static const struct of_device_id tegra_pmc_match[] = {
+   { .compatible = "nvidia,tegra194-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra186-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra210-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra132-pmc", .data = _pmc_soc },
-- 
2.1.4



[PATCH 4/6] dt-bindings: tegra: Add documentation for nvidia,tegra194-pmc

2018-01-07 Thread Mikko Perttunen
The Tegra194 power management controller has one additional register
aperture to be specified in the device tree node.

Signed-off-by: Mikko Perttunen 
---
 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
index 078a58b0302f..5a3bf7c5a7a0 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
@@ -3,6 +3,7 @@ NVIDIA Tegra Power Management Controller (PMC)
 Required properties:
 - compatible: Should contain one of the following:
   - "nvidia,tegra186-pmc": for Tegra186
+  - "nvidia,tegra194-pmc": for Tegra194
 - reg: Must contain an (offset, length) pair of the register set for each
   entry in reg-names.
 - reg-names: Must include the following entries:
@@ -10,6 +11,7 @@ Required properties:
   - "wake"
   - "aotag"
   - "scratch"
+  - "misc" (Only for Tegra194)
 
 Optional properties:
 - nvidia,invert-interrupt: If present, inverts the PMU interrupt signal.
-- 
2.1.4



[PATCH 0/6] Initial support for NVIDIA Tegra194

2018-01-07 Thread Mikko Perttunen
Hello everyone,

this series adds initial support for the NVIDIA Tegra194 "Xavier"
system-on-chip. Initially UART, I2C, SDMMC, as well as the PMIC
are supported, allowing booting to a console.

The changes consist almost completely of the new device trees,
however some fixes are required in the BPMP driver to support the
new channel layout in Tegra194.

The series has been tested on Tegra186 (Jetson TX2) and Tegra194
(P2972).

Cheers,
Mikko

Mikko Perttunen (6):
  firmware: tegra: Simplify channel management
  soc/tegra: Add Tegra194 SoC configuration option
  soc/tegra: pmc: Add Tegra194 compatibility string
  dt-bindings: tegra: Add documentation for nvidia,tegra194-pmc
  arm64: tegra: Add Tegra194 chip device tree
  arm64: tegra: Add device tree for the Tegra194 P2972- board

 .../bindings/arm/tegra/nvidia,tegra186-pmc.txt |   2 +
 arch/arm64/boot/dts/nvidia/Makefile|   1 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 246 +++
 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts |  14 +
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 334 +
 arch/arm64/configs/defconfig   |   1 +
 drivers/firmware/tegra/bpmp.c  | 142 -
 drivers/soc/tegra/Kconfig  |  10 +
 drivers/soc/tegra/pmc.c|   1 +
 include/dt-bindings/clock/tegra194-clock.h |  59 
 include/dt-bindings/gpio/tegra194-gpio.h   |  59 
 include/dt-bindings/reset/tegra194-reset.h |  40 +++
 include/soc/tegra/bpmp.h   |   4 +-
 13 files changed, 833 insertions(+), 80 deletions(-)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

-- 
2.1.4



Re: [PATCH 06/10] drm/tegra: Deliver job completion callback to client

2017-11-29 Thread Mikko Perttunen

On 16.11.2017 18:40, Dmitry Osipenko wrote:

On 05.11.2017 14:01, Mikko Perttunen wrote:

To allow client drivers to free resources when jobs have completed,
deliver job completion callbacks to them. This requires adding
reference counting to context objects, as job completion can happen
after the userspace application has closed the context. As such,
also add kref-based refcounting for contexts.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 27 ---
 drivers/gpu/drm/tegra/drm.h |  4 
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 2cdd054520bf..3e2a4a19412e 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -281,8 +281,11 @@ static int tegra_drm_open(struct drm_device *drm, struct 
drm_file *filp)
return 0;
 }

-static void tegra_drm_context_free(struct tegra_drm_context *context)
+static void tegra_drm_context_free(struct kref *ref)
 {
+   struct tegra_drm_context *context =
+   container_of(ref, struct tegra_drm_context, ref);
+
context->client->ops->close_channel(context);
kfree(context);
 }
@@ -379,6 +382,16 @@ static int host1x_waitchk_copy_from_user(struct 
host1x_waitchk *dest,
return 0;
 }

+static void tegra_drm_job_done(struct host1x_job *job)
+{
+   struct tegra_drm_context *context = job->callback_data;
+
+   if (context->client->ops->submit_done)
+   context->client->ops->submit_done(context);
+
+   kref_put(>ref, tegra_drm_context_free);
+}
+
 int tegra_drm_submit(struct tegra_drm_context *context,
 struct drm_tegra_submit *args, struct drm_device *drm,
 struct drm_file *file)
@@ -560,6 +573,9 @@ int tegra_drm_submit(struct tegra_drm_context *context,
job->syncpt_id = syncpt.id;
job->timeout = 1;

+   job->done = tegra_drm_job_done;
+   job->callback_data = context;
+
if (args->timeout && args->timeout < 1)
job->timeout = args->timeout;

@@ -567,8 +583,11 @@ int tegra_drm_submit(struct tegra_drm_context *context,
if (err)
goto fail;

+   kref_get(>ref);
+
err = host1x_job_submit(job);
if (err) {
+   kref_put(>ref, tegra_drm_context_free);
host1x_job_unpin(job);
goto fail;
}
@@ -717,6 +736,8 @@ static int tegra_open_channel(struct drm_device *drm, void 
*data,
if (err < 0)
kfree(context);

+   kref_init(>ref);
+
mutex_unlock(>lock);
return err;
 }
@@ -738,7 +759,7 @@ static int tegra_close_channel(struct drm_device *drm, void 
*data,
}

idr_remove(>contexts, context->id);
-   tegra_drm_context_free(context);
+   kref_put(>ref, tegra_drm_context_free);

 unlock:
mutex_unlock(>lock);
@@ -1026,7 +1047,7 @@ static int tegra_drm_context_cleanup(int id, void *p, 
void *data)
 {
struct tegra_drm_context *context = p;

-   tegra_drm_context_free(context);
+   kref_put(>ref, tegra_drm_context_free);



Probably won't hurt to add and use 
tegra_drm_context_get()/tegra_drm_context_put().



Yeah, maybe we have enough places where this is called for it to make sense.



Re: [PATCH 10/10] gpu: host1x: Optionally block when acquiring channel

2017-11-29 Thread Mikko Perttunen

On 12.11.2017 13:23, Dmitry Osipenko wrote:

On 11.11.2017 00:15, Dmitry Osipenko wrote:

On 07.11.2017 18:29, Dmitry Osipenko wrote:

On 07.11.2017 16:11, Mikko Perttunen wrote:

On 05.11.2017 19:14, Dmitry Osipenko wrote:

On 05.11.2017 14:01, Mikko Perttunen wrote:

Add an option to host1x_channel_request to interruptibly wait for a
free channel. This allows IOCTLs that acquire a channel to block
the userspace.



Wouldn't it be more optimal to request channel and block after job's pining,
when all patching and checks are completed? Note that right now we have locking
around submission in DRM, which I suppose should go away by making locking fine
grained.


That would be possible, but I don't think it should matter much since contention
here should not be the common case.



Or maybe it would be more optimal to just iterate over channels, like I
suggested before [0]?


Somehow I hadn't noticed this before, but this would break the invariant of
having one client/class per channel.



Yes, currently there is a weak relation of channel and clients device, but seems
channels device is only used for printing dev_* messages and device could be
borrowed from the channels job. I don't see any real point of hardwiring channel
to a specific device or client.


Although, it won't work with syncpoint assignment to channel.


On the other hand.. it should work if one syncpoint could be assigned to
multiple channels, couldn't it?


A syncpoint can only be mapped to a single channel, so unfortunately 
this won't work.


Mikko


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH] drm/tegra: gem: Use PTR_ERR_OR_ZERO()

2017-11-29 Thread Mikko Perttunen

On 29.11.2017 00:18, Vasyl Gomonovych wrote:

Fix ptr_ret.cocci warnings:
drivers/gpu/drm/tegra/gem.c:420:1-3: WARNING: PTR_ERR_OR_ZERO can be used

Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR

Generated by: scripts/coccinelle/api/ptr_ret.cocci

Signed-off-by: Vasyl Gomonovych 
---
 drivers/gpu/drm/tegra/gem.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index ab1e53d434e8..a40148cd0957 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -417,10 +417,8 @@ int tegra_bo_dumb_create(struct drm_file *file, struct 
drm_device *drm,

bo = tegra_bo_create_with_handle(file, drm, args->size, 0,
 >handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);

-   return 0;
+   return PTR_ERR_OR_ZERO(bo);


This is semantically less clear - the current code clearly has error and 
success paths while the changed code doesn't. Same for the other patch.


Cheers,
Mikko


 }

 static int tegra_bo_fault(struct vm_fault *vmf)



Re: [PATCH 04/10] gpu: host1x: Lock classes during job submission

2017-12-05 Thread Mikko Perttunen

On 07.11.2017 23:23, Dmitry Osipenko wrote:

On 07.11.2017 15:28, Mikko Perttunen wrote:

On 05.11.2017 18:46, Dmitry Osipenko wrote:

On 05.11.2017 14:01, Mikko Perttunen wrote:

...

+static int mlock_id_for_class(unsigned int class)
+{
+#if HOST1X_HW >= 6
+switch (class)
+{
+case HOST1X_CLASS_HOST1X:
+return 0;
+case HOST1X_CLASS_VIC:
+return 17;


What is the meaning of returned ID values that you have defined here? Why VIC
should have different ID on T186?


On T186, MLOCKs are not "generic" - the HW knows that each MLOCK corresponds to
a specific class. Therefore we must map that correctly.



Okay.




+default:
+return -EINVAL;
+}
+#else
+switch (class)
+{
+case HOST1X_CLASS_HOST1X:
+return 0;
+case HOST1X_CLASS_GR2D:
+return 1;
+case HOST1X_CLASS_GR2D_SB:
+return 2;


Note that we are allowing to switch 2d classes in the same jobs context and
currently jobs class is somewhat hardcoded to GR2D.

Even though that GR2D and GR2D_SB use different register banks, is it okay to
trigger execution of different classes simultaneously? Would syncpoint
differentiate classes on OP_DONE event?


Good point, we might need to use the same lock for these two.



I suppose that MLOCK (the module lock) implies the whole module locking,
wouldn't it make sense to just use the module ID's defined in the TRM?


Can you point out where these are defined?


See INDMODID / REGF_MODULEID fields of HOST1X_CHANNEL_INDOFF2_0 /
HOST1X_SYNC_REGF_ADDR_0 registers, bit numbers of HOST1X_SYNC_INTSTATUS_0 /
HOST1X_SYNC_INTC0MASK_0 / HOST1X_SYNC_MOD_TEARDOWN_0.


These values look like they would work on T20, but at least on T124 the 
module numbering for modules we want to lock goes above the number of 
MLOCKs so the indexing scheme would not work there..


Mikko


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[PATCH 03/10] gpu: host1x: Add lock around channel allocation

2017-11-05 Thread Mikko Perttunen
With the new channel allocation model, multiple threads can be
allocating channels simultaneously. Therefore we need to add a lock
around the code.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/channel.c | 7 +++
 drivers/gpu/host1x/channel.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2fb93c27c1d9..9d8cad12f9d8 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -42,6 +42,8 @@ int host1x_channel_list_init(struct host1x_channel_list 
*chlist,
 
bitmap_zero(chlist->allocated_channels, num_channels);
 
+   mutex_init(>lock);
+
return 0;
 }
 
@@ -111,8 +113,11 @@ static struct host1x_channel 
*acquire_unused_channel(struct host1x *host)
unsigned int max_channels = host->info->nb_channels;
unsigned int index;
 
+   mutex_lock(>lock);
+
index = find_first_zero_bit(chlist->allocated_channels, max_channels);
if (index >= max_channels) {
+   mutex_unlock(>lock);
dev_err(host->dev, "failed to find free channel\n");
return NULL;
}
@@ -121,6 +126,8 @@ static struct host1x_channel *acquire_unused_channel(struct 
host1x *host)
 
set_bit(index, chlist->allocated_channels);
 
+   mutex_unlock(>lock);
+
return >channels[index];
 }
 
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 7068e42d42df..e68a8ae9a670 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -29,6 +29,8 @@ struct host1x_channel;
 
 struct host1x_channel_list {
struct host1x_channel *channels;
+
+   struct mutex lock;
unsigned long *allocated_channels;
 };
 
-- 
2.14.2



[PATCH 00/10] Dynamic Host1x channel allocation

2017-11-05 Thread Mikko Perttunen
Hi all,

this adds support for a new model of hardware channel allocation for
Host1x/TegraDRM. In the current model, one hardware channel is
allocated for each client device at probe time. This is simple but
does not allow for optimal use of hardware resources.

In the new model, we allocate channels dynamically when a
"userspace channel", opened using the channel open IOCTL, has pending
jobs. However, each userspace channel can only have one assigned
channel at a time, ensuring current serialization behavior is kept.
As such there is no change in programming model for the userspace.

The patch adapts VIC to use the new model - GR2D and GR3D are not
modified, as the older Tegra chips they are found on do not have
a large number of hardware channels and therefore it is not clear
if the new model is beneficial (and I don't have access to those
chips to test it out).

Tested using the host1x_test test suite, and also by running
the performance test of host1x_test in parallel.

Thanks,
Mikko

Mikko Perttunen (10):
  gpu: host1x: Parameterize channel aperture size
  gpu: host1x: Print MLOCK state in debug dumps on T186
  gpu: host1x: Add lock around channel allocation
  gpu: host1x: Lock classes during job submission
  gpu: host1x: Add job done callback
  drm/tegra: Deliver job completion callback to client
  drm/tegra: Make syncpoints be per-context
  drm/tegra: Implement dynamic channel allocation model
  drm/tegra: Boot VIC in runtime resume
  gpu: host1x: Optionally block when acquiring channel

 drivers/gpu/drm/tegra/drm.c|  82 +++--
 drivers/gpu/drm/tegra/drm.h|  12 ++-
 drivers/gpu/drm/tegra/gr2d.c   |   8 +-
 drivers/gpu/drm/tegra/gr3d.c   |   8 +-
 drivers/gpu/drm/tegra/vic.c| 120 
 drivers/gpu/host1x/cdma.c  |  45 ++---
 drivers/gpu/host1x/cdma.h  |   1 +
 drivers/gpu/host1x/channel.c   |  47 --
 drivers/gpu/host1x/channel.h   |   3 +
 drivers/gpu/host1x/hw/cdma_hw.c| 122 +
 drivers/gpu/host1x/hw/channel_hw.c |  74 +++
 drivers/gpu/host1x/hw/debug_hw_1x06.c  |  18 +++-
 drivers/gpu/host1x/hw/host1x01_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x02_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x04_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x05_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x06_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/hw_host1x01_channel.h|   2 +
 drivers/gpu/host1x/hw/hw_host1x01_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x02_channel.h|   2 +
 drivers/gpu/host1x/hw/hw_host1x02_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x04_channel.h|   2 +
 drivers/gpu/host1x/hw/hw_host1x04_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x05_channel.h|   2 +
 drivers/gpu/host1x/hw/hw_host1x05_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |   5 +
 drivers/gpu/host1x/hw/hw_host1x06_vm.h |   2 +
 include/linux/host1x.h |   6 +-
 28 files changed, 517 insertions(+), 118 deletions(-)

-- 
2.14.2



[PATCH 02/10] gpu: host1x: Print MLOCK state in debug dumps on T186

2017-11-05 Thread Mikko Perttunen
Add support for dumping current MLOCK state in debug dumps also
on T186, now that MLOCKs are used by the driver.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/debug_hw_1x06.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c 
b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index b503c740c022..659dd6042ccc 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -131,5 +131,21 @@ static void host1x_debug_show_channel_fifo(struct host1x 
*host,
 
 static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
 {
-   /* TODO */
+   unsigned int i;
+
+   if (!host->hv_regs)
+   return;
+
+   host1x_debug_output(o, " mlocks \n");
+
+   for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+   u32 val = host1x_hypervisor_readl(host, HOST1X_HV_MLOCK(i));
+   if (HOST1X_HV_MLOCK_LOCKED_V(val))
+   host1x_debug_output(o, "%u: locked by channel %u\n",
+   i, HOST1X_HV_MLOCK_CH_V(val));
+   else
+   host1x_debug_output(o, "%u: unlocked\n", i);
+   }
+
+   host1x_debug_output(o, "\n");
 }
-- 
2.14.2



[PATCH 09/10] drm/tegra: Boot VIC in runtime resume

2017-11-05 Thread Mikko Perttunen
This ensures that there are no concurrency issues when multiple users
are trying to use VIC concurrently, and also simplifies the code
slightly.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/vic.c | 47 +++--
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 0cacf023a890..3de20f287112 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -29,7 +29,6 @@ struct vic_config {
 
 struct vic {
struct falcon falcon;
-   bool booted;
 
void __iomem *regs;
struct tegra_drm_client client;
@@ -51,33 +50,12 @@ static void vic_writel(struct vic *vic, u32 value, unsigned 
int offset)
writel(value, vic->regs + offset);
 }
 
-static int vic_runtime_resume(struct device *dev)
-{
-   struct vic *vic = dev_get_drvdata(dev);
-
-   return clk_prepare_enable(vic->clk);
-}
-
-static int vic_runtime_suspend(struct device *dev)
-{
-   struct vic *vic = dev_get_drvdata(dev);
-
-   clk_disable_unprepare(vic->clk);
-
-   vic->booted = false;
-
-   return 0;
-}
-
 static int vic_boot(struct vic *vic)
 {
u32 fce_ucode_size, fce_bin_data_offset;
void *hdr;
int err = 0;
 
-   if (vic->booted)
-   return 0;
-
/* setup clockgating registers */
vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
CG_IDLE_CG_EN |
@@ -108,7 +86,26 @@ static int vic_boot(struct vic *vic)
return err;
}
 
-   vic->booted = true;
+   return 0;
+}
+
+static int vic_runtime_resume(struct device *dev)
+{
+   struct vic *vic = dev_get_drvdata(dev);
+   int err;
+
+   err = clk_prepare_enable(vic->clk);
+   if (err < 0)
+   return err;
+
+   return vic_boot(vic);
+}
+
+static int vic_runtime_suspend(struct device *dev)
+{
+   struct vic *vic = dev_get_drvdata(dev);
+
+   clk_disable_unprepare(vic->clk);
 
return 0;
 }
@@ -225,10 +222,6 @@ static int vic_submit(struct tegra_drm_context *context,
if (err < 0)
return err;
 
-   err = vic_boot(vic);
-   if (err < 0)
-   goto put_vic;
-
err = tegra_drm_context_get_channel(context);
if (err < 0)
goto put_vic;
-- 
2.14.2



[PATCH 08/10] drm/tegra: Implement dynamic channel allocation model

2017-11-05 Thread Mikko Perttunen
In the traditional channel allocation model, a single hardware channel
was allocated for each client. This is simple from an implementation
perspective but prevents use of hardware scheduling.

This patch implements a channel allocation model where when a user
submits a job for a context, a hardware channel is allocated for
that context. The same channel is kept for as long as there are
incomplete jobs for that context. This way we can use hardware
scheduling and channel isolation between userspace processes, but
also prevent idling contexts from taking up hardware resources.

For now, this patch only adapts VIC to the new model.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 46 ++
 drivers/gpu/drm/tegra/drm.h |  7 +++-
 drivers/gpu/drm/tegra/vic.c | 79 +++--
 3 files changed, 92 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index b964e18e3058..658bc8814f38 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -382,6 +382,51 @@ static int host1x_waitchk_copy_from_user(struct 
host1x_waitchk *dest,
return 0;
 }
 
+/**
+ * tegra_drm_context_get_channel() - Get a channel for submissions
+ * @context: Context for which to get a channel for
+ *
+ * Request a free hardware host1x channel for this user context, or if the
+ * context already has one, bump its refcount.
+ *
+ * Returns 0 on success, or -EBUSY if there were no free hardware channels.
+ */
+int tegra_drm_context_get_channel(struct tegra_drm_context *context)
+{
+   struct host1x_client *client = >client->base;
+
+   mutex_lock(>lock);
+
+   if (context->pending_jobs == 0) {
+   context->channel = host1x_channel_request(client->dev);
+   if (!context->channel) {
+   mutex_unlock(>lock);
+   return -EBUSY;
+   }
+   }
+
+   context->pending_jobs++;
+
+   mutex_unlock(>lock);
+
+   return 0;
+}
+
+/**
+ * tegra_drm_context_put_channel() - Put a previously gotten channel
+ * @context: Context which channel is no longer needed
+ *
+ * Decrease the refcount of the channel associated with this context,
+ * freeing it if the refcount drops to zero.
+ */
+void tegra_drm_context_put_channel(struct tegra_drm_context *context)
+{
+   mutex_lock(>lock);
+   if (--context->pending_jobs == 0)
+   host1x_channel_put(context->channel);
+   mutex_unlock(>lock);
+}
+
 static void tegra_drm_job_done(struct host1x_job *job)
 {
struct tegra_drm_context *context = job->callback_data;
@@ -737,6 +782,7 @@ static int tegra_open_channel(struct drm_device *drm, void 
*data,
kfree(context);
 
kref_init(>ref);
+   mutex_init(>lock);
 
mutex_unlock(>lock);
return err;
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 11d690846fd0..d0c3f1f779f6 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -78,9 +78,12 @@ struct tegra_drm_context {
struct kref ref;
 
struct tegra_drm_client *client;
+   unsigned int id;
+
+   struct mutex lock;
struct host1x_channel *channel;
struct host1x_syncpt *syncpt;
-   unsigned int id;
+   unsigned int pending_jobs;
 };
 
 struct tegra_drm_client_ops {
@@ -95,6 +98,8 @@ struct tegra_drm_client_ops {
void (*submit_done)(struct tegra_drm_context *context);
 };
 
+int tegra_drm_context_get_channel(struct tegra_drm_context *context);
+void tegra_drm_context_put_channel(struct tegra_drm_context *context);
 int tegra_drm_submit(struct tegra_drm_context *context,
 struct drm_tegra_submit *args, struct drm_device *drm,
 struct drm_file *file);
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index efe5f3af933e..0cacf023a890 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -33,7 +33,6 @@ struct vic {
 
void __iomem *regs;
struct tegra_drm_client client;
-   struct host1x_channel *channel;
struct iommu_domain *domain;
struct device *dev;
struct clk *clk;
@@ -161,28 +160,12 @@ static int vic_init(struct host1x_client *client)
goto detach_device;
}
 
-   vic->channel = host1x_channel_request(client->dev);
-   if (!vic->channel) {
-   err = -ENOMEM;
-   goto detach_device;
-   }
-
-   client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
-   if (!client->syncpts[0]) {
-   err = -ENOMEM;
-   goto free_channel;
-   }
-
err = tegra_drm_register_client(tegra, drm);
if (err < 0)
-   goto free_syncpt;
+   goto detach_device;
 
return 0;
 

[PATCH 10/10] gpu: host1x: Optionally block when acquiring channel

2017-11-05 Thread Mikko Perttunen
Add an option to host1x_channel_request to interruptibly wait for a
free channel. This allows IOCTLs that acquire a channel to block
the userspace.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c  |  9 +
 drivers/gpu/drm/tegra/gr2d.c |  6 +++---
 drivers/gpu/drm/tegra/gr3d.c |  6 +++---
 drivers/gpu/host1x/channel.c | 40 ++--
 drivers/gpu/host1x/channel.h |  1 +
 include/linux/host1x.h   |  2 +-
 6 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 658bc8814f38..19f77c1a76c0 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -389,7 +389,8 @@ static int host1x_waitchk_copy_from_user(struct 
host1x_waitchk *dest,
  * Request a free hardware host1x channel for this user context, or if the
  * context already has one, bump its refcount.
  *
- * Returns 0 on success, or -EBUSY if there were no free hardware channels.
+ * Returns 0 on success, -EINTR if wait for a free channel was interrupted,
+ * or other error.
  */
 int tegra_drm_context_get_channel(struct tegra_drm_context *context)
 {
@@ -398,10 +399,10 @@ int tegra_drm_context_get_channel(struct 
tegra_drm_context *context)
mutex_lock(>lock);
 
if (context->pending_jobs == 0) {
-   context->channel = host1x_channel_request(client->dev);
-   if (!context->channel) {
+   context->channel = host1x_channel_request(client->dev, true);
+   if (IS_ERR(context->channel)) {
mutex_unlock(>lock);
-   return -EBUSY;
+   return PTR_ERR(context->channel);
}
}
 
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 3db3bcac48b9..c1853402f69b 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -32,9 +32,9 @@ static int gr2d_init(struct host1x_client *client)
unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
struct gr2d *gr2d = to_gr2d(drm);
 
-   gr2d->channel = host1x_channel_request(client->dev);
-   if (!gr2d->channel)
-   return -ENOMEM;
+   gr2d->channel = host1x_channel_request(client->dev, false);
+   if (IS_ERR(gr2d->channel))
+   return PTR_ERR(gr2d->channel);
 
client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
if (!client->syncpts[0]) {
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 279438342c8c..793a91d577cb 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -42,9 +42,9 @@ static int gr3d_init(struct host1x_client *client)
unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
struct gr3d *gr3d = to_gr3d(drm);
 
-   gr3d->channel = host1x_channel_request(client->dev);
-   if (!gr3d->channel)
-   return -ENOMEM;
+   gr3d->channel = host1x_channel_request(client->dev, false);
+   if (IS_ERR(gr3d->channel))
+   return PTR_ERR(gr3d->channel);
 
client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
if (!client->syncpts[0]) {
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 9d8cad12f9d8..eebcd51261df 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -43,6 +43,7 @@ int host1x_channel_list_init(struct host1x_channel_list 
*chlist,
bitmap_zero(chlist->allocated_channels, num_channels);
 
mutex_init(>lock);
+   sema_init(>sema, num_channels);
 
return 0;
 }
@@ -99,6 +100,8 @@ static void release_channel(struct kref *kref)
host1x_cdma_deinit(>cdma);
 
clear_bit(channel->id, chlist->allocated_channels);
+
+   up(>sema);
 }
 
 void host1x_channel_put(struct host1x_channel *channel)
@@ -107,19 +110,30 @@ void host1x_channel_put(struct host1x_channel *channel)
 }
 EXPORT_SYMBOL(host1x_channel_put);
 
-static struct host1x_channel *acquire_unused_channel(struct host1x *host)
+static struct host1x_channel *acquire_unused_channel(struct host1x *host,
+bool wait)
 {
struct host1x_channel_list *chlist = >channel_list;
unsigned int max_channels = host->info->nb_channels;
unsigned int index;
+   int err;
+
+   if (wait) {
+   err = down_interruptible(>sema);
+   if (err)
+   return ERR_PTR(err);
+   } else {
+   if (down_trylock(>sema))
+   return ERR_PTR(-EBUSY);
+   }
 
mutex_lock(>lock);
 
index = find_first_zero_bit(chlist->allocated_channels, max_channels);
-   if (index >= max_channels) {
+   if (WARN(index >= max_channels, "failed to find free cha

[PATCH 07/10] drm/tegra: Make syncpoints be per-context

2017-11-05 Thread Mikko Perttunen
As a preparation for each context potentially being able to have a
separate hardware channel, and thus requiring a separate syncpoint,
move syncpoints to be stored inside each context instead of global
client data.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c  | 8 
 drivers/gpu/drm/tegra/drm.h  | 1 +
 drivers/gpu/drm/tegra/gr2d.c | 2 ++
 drivers/gpu/drm/tegra/gr3d.c | 2 ++
 drivers/gpu/drm/tegra/vic.c  | 2 ++
 5 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 3e2a4a19412e..b964e18e3058 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -783,12 +783,12 @@ static int tegra_get_syncpt(struct drm_device *drm, void 
*data,
goto unlock;
}
 
-   if (args->index >= context->client->base.num_syncpts) {
+   if (args->index >= 1) {
err = -EINVAL;
goto unlock;
}
 
-   syncpt = context->client->base.syncpts[args->index];
+   syncpt = context->syncpt;
args->id = host1x_syncpt_id(syncpt);
 
 unlock:
@@ -837,12 +837,12 @@ static int tegra_get_syncpt_base(struct drm_device *drm, 
void *data,
goto unlock;
}
 
-   if (args->syncpt >= context->client->base.num_syncpts) {
+   if (args->syncpt >= 1) {
err = -EINVAL;
goto unlock;
}
 
-   syncpt = context->client->base.syncpts[args->syncpt];
+   syncpt = context->syncpt;
 
base = host1x_syncpt_get_base(syncpt);
if (!base) {
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 079aebb3fb38..11d690846fd0 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -79,6 +79,7 @@ struct tegra_drm_context {
 
struct tegra_drm_client *client;
struct host1x_channel *channel;
+   struct host1x_syncpt *syncpt;
unsigned int id;
 };
 
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 6ea070da7718..3db3bcac48b9 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -76,6 +76,8 @@ static int gr2d_open_channel(struct tegra_drm_client *client,
if (!context->channel)
return -ENOMEM;
 
+   context->syncpt = client->base.syncpts[0];
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index cee2ab645cde..279438342c8c 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -86,6 +86,8 @@ static int gr3d_open_channel(struct tegra_drm_client *client,
if (!context->channel)
return -ENOMEM;
 
+   context->syncpt = client->base.syncpts[0];
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 6697a21a250d..efe5f3af933e 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -240,6 +240,8 @@ static int vic_open_channel(struct tegra_drm_client *client,
return -ENOMEM;
}
 
+   context->syncpt = client->base.syncpts[0];
+
return 0;
 }
 
-- 
2.14.2



[PATCH 04/10] gpu: host1x: Lock classes during job submission

2017-11-05 Thread Mikko Perttunen
Host1x has a feature called MLOCKs which allow a certain class
(~HW unit) to be locked (in the mutex sense) and unlocked during
command execution, preventing other channels from accessing the
class while it is locked. This is necessary to prevent concurrent
jobs from messing up class state.

This has not been necessary so far since due to our channel allocation
model, there has only been a single hardware channel submitting
commands to each class. Future patches, however, change the channel
allocation model to allow hardware-scheduled concurrency, and as such
we need to start locking.

This patch implements locking on all platforms from Tegra20 to
Tegra186.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/cdma.c  |   1 +
 drivers/gpu/host1x/cdma.h  |   1 +
 drivers/gpu/host1x/hw/cdma_hw.c| 122 +
 drivers/gpu/host1x/hw/channel_hw.c |  71 ++
 drivers/gpu/host1x/hw/host1x01_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x02_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x04_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x05_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/host1x06_hardware.h  |  10 ++
 drivers/gpu/host1x/hw/hw_host1x01_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x02_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x04_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x05_sync.h   |   6 ++
 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |   5 +
 14 files changed, 257 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 28541b280739..f787cfe69c11 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -232,6 +232,7 @@ static void cdma_start_timer_locked(struct host1x_cdma 
*cdma,
}
 
cdma->timeout.client = job->client;
+   cdma->timeout.class = job->class;
cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id);
cdma->timeout.syncpt_val = job->syncpt_end;
cdma->timeout.start_ktime = ktime_get();
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 286d49386be9..e72660fc83c9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -59,6 +59,7 @@ struct buffer_timeout {
ktime_t start_ktime;/* starting time */
/* context timeout information */
int client;
+   u32 class;
 };
 
 enum cdma_event {
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index ce320534cbed..4d5970d863d5 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -243,6 +244,125 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 
getptr)
cdma_timeout_restart(cdma, getptr);
 }
 
+static int mlock_id_for_class(unsigned int class)
+{
+#if HOST1X_HW >= 6
+   switch (class)
+   {
+   case HOST1X_CLASS_HOST1X:
+   return 0;
+   case HOST1X_CLASS_VIC:
+   return 17;
+   default:
+   return -EINVAL;
+   }
+#else
+   switch (class)
+   {
+   case HOST1X_CLASS_HOST1X:
+   return 0;
+   case HOST1X_CLASS_GR2D:
+   return 1;
+   case HOST1X_CLASS_GR2D_SB:
+   return 2;
+   case HOST1X_CLASS_VIC:
+   return 3;
+   case HOST1X_CLASS_GR3D:
+   return 4;
+   default:
+   return -EINVAL;
+   }
+#endif
+}
+
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 6
+   struct host1x_channel *ch = cdma_to_channel(cdma);
+   struct host1x *host = cdma_to_host1x(cdma);
+   u32 pb_pos, pb_temp[3], val;
+   int err, mlock_id;
+
+   if (!host->hv_regs)
+   return;
+
+   mlock_id = mlock_id_for_class(cdma->timeout.class);
+   if (WARN(mlock_id < 0, "Invalid class ID"))
+   return;
+
+   val = host1x_hypervisor_readl(host, HOST1X_HV_MLOCK(mlock_id));
+   if (!HOST1X_HV_MLOCK_LOCKED_V(val) ||
+   HOST1X_HV_MLOCK_CH_V(val) != ch->id)
+   {
+   /* Channel is not holding the MLOCK, nothing to release. */
+   return;
+   }
+
+   /*
+* On Tegra186, there is no register to unlock an MLOCK (don't ask me
+* why). As such, we have to execute a release_mlock instruction to
+* do it. We do this by backing up the first three opcodes of the
+* pushbuffer and replacing them with our own short sequence to do
+* the unlocking. We set the .pos field to 12, which causes DMAEND
+* to be set accordingly such that only the three opcodes we set
+* here are executed before CDMA stops. Finally w

[PATCH 05/10] gpu: host1x: Add job done callback

2017-11-05 Thread Mikko Perttunen
Allow job submitters to set a callback to be called when the job has
completed. The jobs are stored and the callbacks called outside the
CDMA lock area to allow the callbacks to do CDMA-requiring operations
like freeing channels.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/cdma.c | 44 +---
 include/linux/host1x.h|  4 
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index f787cfe69c11..57221d199d33 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -251,17 +251,24 @@ static void stop_cdma_timer_locked(struct host1x_cdma 
*cdma)
cdma->timeout.client = 0;
 }
 
-/*
- * For all sync queue entries that have already finished according to the
- * current sync point registers:
- *  - unpin & unref their mems
- *  - pop their push buffer slots
- *  - remove them from the sync queue
+/**
+ * update_cdma_locked() - Update CDMA sync queue
+ * @cdma: CDMA instance to update
+ * @done_jobs: List that finished jobs will be added to
+ *
+ * Go through the CDMA's sync queue, and for each job that has been finished,
+ * - unpin it
+ * - pop its push buffer slots
+ * - remove it from the sync queue
+ * - add it to the done_jobs list.
+ *
  * This is normally called from the host code's worker thread, but can be
  * called manually if necessary.
- * Must be called with the cdma lock held.
+ *
+ * Must be called with the CDMA lock held.
  */
-static void update_cdma_locked(struct host1x_cdma *cdma)
+static void update_cdma_locked(struct host1x_cdma *cdma,
+  struct list_head *done_jobs)
 {
bool signal = false;
struct host1x *host1x = cdma_to_host1x(cdma);
@@ -305,8 +312,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
signal = true;
}
 
-   list_del(>list);
-   host1x_job_put(job);
+   list_move_tail(>list, done_jobs);
}
 
if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY &&
@@ -542,7 +548,23 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
  */
 void host1x_cdma_update(struct host1x_cdma *cdma)
 {
+   struct host1x_job *job, *tmp;
+   LIST_HEAD(done_jobs);
+
mutex_lock(>lock);
-   update_cdma_locked(cdma);
+   update_cdma_locked(cdma, _jobs);
mutex_unlock(>lock);
+
+   /*
+* The done callback may want to free the channel, which requires
+* taking the CDMA lock, so we need to do it outside the above lock
+* region.
+*/
+   list_for_each_entry_safe(job, tmp, _jobs, list) {
+   if (job->done)
+   job->done(job);
+
+   list_del(>list);
+   host1x_job_put(job);
+   }
 }
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 630b1a98ab58..f931d28a68ff 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -253,6 +253,10 @@ struct host1x_job {
/* Check if class belongs to the unit */
int (*is_valid_class)(u32 class);
 
+   /* Job done callback */
+   void (*done)(struct host1x_job *job);
+   void *callback_data;
+
/* Request a SETCLASS to this class */
u32 class;
 
-- 
2.14.2



[PATCH 06/10] drm/tegra: Deliver job completion callback to client

2017-11-05 Thread Mikko Perttunen
To allow client drivers to free resources when jobs have completed,
deliver job completion callbacks to them. This requires adding
reference counting to context objects, as job completion can happen
after the userspace application has closed the context. As such,
also add kref-based refcounting for contexts.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 27 ---
 drivers/gpu/drm/tegra/drm.h |  4 
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 2cdd054520bf..3e2a4a19412e 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -281,8 +281,11 @@ static int tegra_drm_open(struct drm_device *drm, struct 
drm_file *filp)
return 0;
 }
 
-static void tegra_drm_context_free(struct tegra_drm_context *context)
+static void tegra_drm_context_free(struct kref *ref)
 {
+   struct tegra_drm_context *context =
+   container_of(ref, struct tegra_drm_context, ref);
+
context->client->ops->close_channel(context);
kfree(context);
 }
@@ -379,6 +382,16 @@ static int host1x_waitchk_copy_from_user(struct 
host1x_waitchk *dest,
return 0;
 }
 
+static void tegra_drm_job_done(struct host1x_job *job)
+{
+   struct tegra_drm_context *context = job->callback_data;
+
+   if (context->client->ops->submit_done)
+   context->client->ops->submit_done(context);
+
+   kref_put(>ref, tegra_drm_context_free);
+}
+
 int tegra_drm_submit(struct tegra_drm_context *context,
 struct drm_tegra_submit *args, struct drm_device *drm,
 struct drm_file *file)
@@ -560,6 +573,9 @@ int tegra_drm_submit(struct tegra_drm_context *context,
job->syncpt_id = syncpt.id;
job->timeout = 1;
 
+   job->done = tegra_drm_job_done;
+   job->callback_data = context;
+
if (args->timeout && args->timeout < 1)
job->timeout = args->timeout;
 
@@ -567,8 +583,11 @@ int tegra_drm_submit(struct tegra_drm_context *context,
if (err)
goto fail;
 
+   kref_get(>ref);
+
err = host1x_job_submit(job);
if (err) {
+   kref_put(>ref, tegra_drm_context_free);
host1x_job_unpin(job);
goto fail;
}
@@ -717,6 +736,8 @@ static int tegra_open_channel(struct drm_device *drm, void 
*data,
if (err < 0)
kfree(context);
 
+   kref_init(>ref);
+
mutex_unlock(>lock);
return err;
 }
@@ -738,7 +759,7 @@ static int tegra_close_channel(struct drm_device *drm, void 
*data,
}
 
idr_remove(>contexts, context->id);
-   tegra_drm_context_free(context);
+   kref_put(>ref, tegra_drm_context_free);
 
 unlock:
mutex_unlock(>lock);
@@ -1026,7 +1047,7 @@ static int tegra_drm_context_cleanup(int id, void *p, 
void *data)
 {
struct tegra_drm_context *context = p;
 
-   tegra_drm_context_free(context);
+   kref_put(>ref, tegra_drm_context_free);
 
return 0;
 }
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 063f5d397526..079aebb3fb38 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -74,6 +75,8 @@ struct tegra_drm {
 struct tegra_drm_client;
 
 struct tegra_drm_context {
+   struct kref ref;
+
struct tegra_drm_client *client;
struct host1x_channel *channel;
unsigned int id;
@@ -88,6 +91,7 @@ struct tegra_drm_client_ops {
int (*submit)(struct tegra_drm_context *context,
  struct drm_tegra_submit *args, struct drm_device *drm,
  struct drm_file *file);
+   void (*submit_done)(struct tegra_drm_context *context);
 };
 
 int tegra_drm_submit(struct tegra_drm_context *context,
-- 
2.14.2



[PATCH 01/10] gpu: host1x: Parameterize channel aperture size

2017-11-05 Thread Mikko Perttunen
The size of a single channel's aperture is different on Tegra186 vs.
previous chips. Parameterize the value using a new define in the
register definition headers.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/channel_hw.c  | 3 +--
 drivers/gpu/host1x/hw/hw_host1x01_channel.h | 2 ++
 drivers/gpu/host1x/hw/hw_host1x02_channel.h | 2 ++
 drivers/gpu/host1x/hw/hw_host1x04_channel.h | 2 ++
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 2 ++
 drivers/gpu/host1x/hw/hw_host1x06_vm.h  | 2 ++
 6 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 5c0dc6bb51d1..246b78c41281 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -26,7 +26,6 @@
 #include "../intr.h"
 #include "../job.h"
 
-#define HOST1X_CHANNEL_SIZE 16384
 #define TRACE_MAX_LENGTH 128U
 
 static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
@@ -205,7 +204,7 @@ static void enable_gather_filter(struct host1x *host,
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
   unsigned int index)
 {
-   ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+   ch->regs = dev->regs + HOST1X_CHANNEL_BASE(index);
enable_gather_filter(dev, ch);
return 0;
 }
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
index b4bc7ca4e051..be56a3a506de 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
@@ -51,6 +51,8 @@
 #ifndef __hw_host1x_channel_host1x_h__
 #define __hw_host1x_channel_host1x_h__
 
+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
 static inline u32 host1x_channel_fifostat_r(void)
 {
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
index e490bcde33fe..a142576a2c6e 100644
--- a/drivers/gpu/host1x/hw/hw_host1x02_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
@@ -51,6 +51,8 @@
 #ifndef HOST1X_HW_HOST1X02_CHANNEL_H
 #define HOST1X_HW_HOST1X02_CHANNEL_H
 
+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
 static inline u32 host1x_channel_fifostat_r(void)
 {
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 2e8b635aa660..645483c07fc2 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -51,6 +51,8 @@
 #ifndef HOST1X_HW_HOST1X04_CHANNEL_H
 #define HOST1X_HW_HOST1X04_CHANNEL_H
 
+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
 static inline u32 host1x_channel_fifostat_r(void)
 {
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index abbbc2641ce6..6aef6bc1c96d 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -51,6 +51,8 @@
 #ifndef HOST1X_HW_HOST1X05_CHANNEL_H
 #define HOST1X_HW_HOST1X05_CHANNEL_H
 
+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
 static inline u32 host1x_channel_fifostat_r(void)
 {
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h 
b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
index e54b33902332..0750aea78a30 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_vm.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
@@ -15,6 +15,8 @@
  *
  */
 
+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x100)
+
 #define HOST1X_CHANNEL_DMASTART0x
 #define HOST1X_CHANNEL_DMASTART_HI 0x0004
 #define HOST1X_CHANNEL_DMAPUT  0x0008
-- 
2.14.2



Re: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

2017-09-22 Thread Mikko Perttunen

On 09/05/2017 04:33 PM, Dmitry Osipenko wrote:

On 05.09.2017 11:10, Mikko Perttunen wrote:
... >> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 

b/drivers/gpu/host1x/hw/channel_hw.c

index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
  
  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
  
+	/* assign syncpoint to channel */

+   host1x_hw_syncpt_assign_channel(host, sp, ch);


This function could be renamed to host1x_hw_assign_syncpt_to_channel() and then
comment to it won't be needed.


Maybe host1x_hw_syncpt_assign_to_channel? I'd like to keep the current 
noun_verb format. Though IMHO even the current name is pretty 
descriptive in itself.




It is not very nice that channel would be re-assigned on each submit. Maybe that
assignment should be done by host1x_syncpt_request() ?


host1x_syncpt_request doesn't know about the channel so we'd have to 
thread this information there and through each client driver in 
drm/tegra/, so I decided not to do this at this time. I'm planning a new 
channel allocation model which will change that side of the code anyway, 
so I'd like to revisit this at that point. For our current channel 
model, the current implementation doesn't have any functional downsides 
anyway.





+
job->syncpt_end = syncval;
  
  	/* add a setclass for modules that require it */

diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..dc7a44614fef 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
void *patch_addr)
return 0;
  }
  
+/**

+ * syncpt_assign_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_channel(struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   struct host1x *host = sp->host;
+
+   if (!host->hv_regs)
+   return;
+
+   host1x_sync_writel(host,
+  HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+  HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
+{
+#if HOST1X_HW >= 6
+   if (!host->hv_regs)
+   return;
+
+   host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+   .assign_channel = syncpt_assign_channel,
+   .enable_protection = syncpt_enable_protection,
  };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..4c7a4c8b2ad2 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
+
+   /*
+* Unassign syncpt from channels for purposes of Tegra186
+* syncpoint protection. This prevents any channel from
+* accessing it until it is reassigned.
+*/
+   host1x_hw_syncpt_assign_channel(host, [i], NULL);
}
  
  	for (i = 0; i < host->info->nb_bases; i++)

@@ -408,6 +415,7 @@ int host1x_syncpt_init(struct host1x *host)
host->bases = bases;
  
  	host1x_syncpt_restore(host);

+   host1x_hw_syncpt_enable_protection(host);
  
  	/* Allocate sync point to use for clearing waits for expired fences */

host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);






Re: [PATCH] arm64: tegra: Add SMMU node for Tegra186

2017-09-20 Thread Mikko Perttunen

Reviewed-by: Mikko Perttunen 
Tested-by: Mikko Perttunen 

Tested to work with Host1x :)

I noticed a slight difference with downstream where downstream has 
global interrupts 170 and 171 - but looks like the latter is for secure 
faults which we should never get so this way seems more correct.


Thanks,
Mikko

On 14.09.2017 02:01, Krishna Reddy wrote:

Add the DT node for ARM SMMU on Tegra186.

Signed-off-by: Krishna Reddy 
---
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 73 
 1 file changed, 73 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 0b0552c9f7dd..e2c3ad203c93 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -355,6 +355,79 @@
nvidia,bpmp = <>;
};

+   smmu: iommu@1200 {
+   compatible = "arm,mmu-500";
+   reg = <0 0x1200 0 0x80>;
+   #global-interrupts = <1>;
+   interrupts = ,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+;
+   #iommu-cells = <1>;
+   stream-match-mask = <0x7F80>;
+   };
+
gpu@1700 {
compatible = "nvidia,gp10b";
reg = <0x0 0x1700 0x0 0x100>,



Re: [PATCH 4/4] drm/tegra: Use u64_to_user_ptr helper

2017-08-19 Thread Mikko Perttunen

On 08/19/2017 01:05 AM, Dmitry Osipenko wrote:

On 18.08.2017 19:15, Mikko Perttunen wrote:

Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
to user pointers instead of writing out the cast manually.

Signed-off-by: Mikko Perttunen 
---
  drivers/gpu/drm/tegra/drm.c | 9 -
  1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e3331a2bc082..78c98736b0a5 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -389,11 +389,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_relocs = args->num_relocs;
unsigned int num_waitchks = args->num_waitchks;
struct drm_tegra_cmdbuf __user *cmdbufs =
-   (void __user *)(uintptr_t)args->cmdbufs;
-   struct drm_tegra_reloc __user *relocs =
-   (void __user *)(uintptr_t)args->relocs;
+   u64_to_user_ptr(args->cmdbufs);
+   struct drm_tegra_reloc __user *relocs = u64_to_user_ptr(args->relocs);
struct drm_tegra_waitchk __user *waitchks =
-   (void __user *)(uintptr_t)args->waitchks;
+   u64_to_user_ptr(args->waitchks);


What about to factor out 'u64_to_user_ptr()' assignments to reduce messiness a
tad? Like this:

struct drm_tegra_waitchk __user *waitchks;
struct drm_tegra_cmdbuf __user *cmdbufs;
struct drm_tegra_reloc __user *relocs;
...
waitchks = u64_to_user_ptr(args->waitchks);
cmdbufs = u64_to_user_ptr(args->cmdbufs);
relocs = u64_to_user_ptr(args->relocs);



struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct host1x_syncpt *sp;
@@ -520,7 +519,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
}
}
  
-	if (copy_from_user(, (void __user *)(uintptr_t)args->syncpts,

+   if (copy_from_user(, u64_to_user_ptr(args->syncpts),


What about to define and use 'struct drm_tegra_reloc __user *syncpts' for
consistency with other '__user' definitions?


   sizeof(syncpt))) {
err = -EFAULT;
goto fail;





Yeah, these are good ideas. I'll post a v2 at some point with these 
changes. Thanks!


Mikko


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Mikko Perttunen



On 08/19/2017 01:36 AM, Dmitry Osipenko wrote:

On 18.08.2017 19:15, Mikko Perttunen wrote:

Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen 
---
  drivers/gpu/host1x/dev.h   | 16 
  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++
  drivers/gpu/host1x/syncpt.c|  3 +++
  4 files changed, 48 insertions(+)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..2432a30ff6e2 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
u32 (*load)(struct host1x_syncpt *syncpt);
int (*cpu_incr)(struct host1x_syncpt *syncpt);
int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+   void (*assign_channel)(struct host1x_syncpt *syncpt,
+  struct host1x_channel *channel);
+   void (*set_protection)(struct host1x *host, bool enabled);
  };
  
  struct host1x_intr_ops {

@@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
host1x *host,
return host->syncpt_op->patch_wait(sp, patch_addr);
  }
  
+static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,

+  struct host1x_syncpt *sp,
+  struct host1x_channel *ch)
+{
+   return host->syncpt_op->assign_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
+  bool enabled)
+{
+   return host->syncpt_op->set_protection(host, enabled);
+}
+
  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
void (*syncpt_thresh_work)(struct work_struct *))
  {
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
  
  	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
  
+	/* assign syncpoint to channel */

+   host1x_hw_syncpt_assign_channel(host, sp, ch);
+
job->syncpt_end = syncval;
  
  	/* add a setclass for modules that require it */

diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..5d117ab1699e 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
void *patch_addr)
return 0;
  }
  
+static void syncpt_assign_channel(struct host1x_syncpt *sp,

+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   struct host1x *host = sp->host;
+
+   if (!host->hv_regs)
+   return;
+
+   host1x_sync_writel(host,
+  HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+  HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+static void syncpt_set_protection(struct host1x *host, bool enabled)
+{
+#if HOST1X_HW >= 6
+   host1x_hypervisor_writel(host,
+enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
+HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+   .assign_channel = syncpt_assign_channel,
+   .set_protection = syncpt_set_protection,
  };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..fe4d963b3e2a 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
+
+   host1x_hw_syncpt_assign_channel(host, [i], NULL);
}
  
  	for (i = 0; i < host->info->nb_bases; i++)

@@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
host->bases = bases;
  
  	host1x_syncpt_restore(host);

+   host1x_hw_syncpt_set_protection(host, true);


Is it really okay to force the p

Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Mikko Perttunen

On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:

On 19.08.2017 11:10, Mikko Perttunen wrote:
[snip]

+host1x_hw_syncpt_set_protection(host, true);


Is it really okay to force the protection? Maybe protection should be enabled
with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
avoid software jobs validation for Tegra124+.


I don't quite get your comment. The hardware syncpt protection layer being
enabled should never hurt - it doesn't mess with any valid jobs. It's also only
on Tegra186 so I'm not sure where the Tegra124 comes from.


Right, it's the gather filter on T124+, my bad. This raises several questions.

1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
actually want to be a bit more flexible and allow to disable it. Imagine that
you are making a custom application and want to utilize channels in a different 
way.


I think it should be up to the user to decide whether they want the 
firewall or not. It's clearly the most useful on the older chips - 
especially Tegra20 due to lack of IOMMU. The performance penalty is too 
great to force it on always.


The programming model should always be considered the same - the rules 
of what you are allowed to do are the same whether the firewall, or any 
hardware-implemented protection features, are on or not.




2) Since syncpoint protection is a T186 feature, what about previous
generations? Should we validate syncpoints in software for them? We have
'syncpoint validation' patch staged in grate's kernel
https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
(I'll start sending out this and other patches after a bit more thorough
testing.) Improperly used syncpoints potentially could allow one program to
damage others.


Yes, I think the firewall should have this feature for older 
generations. We could disable the check on Tegra186, as you point 
towards in question 4.




3) What exactly does gather filter? Could you list all the commands that it
filters out, please?


According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and 
EXTEND are filtered.




4) What about T30/T114 that do not have gather filter? Should we validate those
commands for them in a software firewall?


Yes, the firewall should validate that.



So maybe we should implement several layers of validation in the SW firewall.
Like all layers for T20 (memory boundaries validation etc), software gather
filter for T30/114 and software syncpoint validation for T30/114/124/210.



That seems like a good idea.

Thanks,
Mikko


Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-19 Thread Mikko Perttunen

On 08/19/2017 01:42 PM, Dmitry Osipenko wrote:

On 18.08.2017 19:15, Mikko Perttunen wrote:

The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen 
---


TRM says that "Invalid Gbuffer cmd" interrupt would be raised when filtering
happens. Is that interrupt disabled by default or it would cause 'unhandled
interrupt'?



It's disabled by default. Jobs that are stopped by the filter are then 
handled by the usual timeout mechanism.


Mikko


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Mikko Perttunen



On 08/19/2017 02:11 PM, Dmitry Osipenko wrote:

On 19.08.2017 13:35, Mikko Perttunen wrote:

On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:

On 19.08.2017 11:10, Mikko Perttunen wrote:
[snip]

+host1x_hw_syncpt_set_protection(host, true);


Is it really okay to force the protection? Maybe protection should be enabled
with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
avoid software jobs validation for Tegra124+.


I don't quite get your comment. The hardware syncpt protection layer being
enabled should never hurt - it doesn't mess with any valid jobs. It's also only
on Tegra186 so I'm not sure where the Tegra124 comes from.


Right, it's the gather filter on T124+, my bad. This raises several questions.

1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
actually want to be a bit more flexible and allow to disable it. Imagine that
you are making a custom application and want to utilize channels in a
different way.


I think it should be up to the user to decide whether they want the firewall or
not. It's clearly the most useful on the older chips - especially Tegra20 due to
lack of IOMMU. The performance penalty is too great to force it on always.



Of course there is some overhead but is not that great. Usually command buffer
contains just a dozen of commands. It should be an interesting challenge to
optimize its performance though.


The programming model should always be considered the same - the rules of what
you are allowed to do are the same whether the firewall, or any
hardware-implemented protection features, are on or not.



Well, okay.



2) Since syncpoint protection is a T186 feature, what about previous
generations? Should we validate syncpoints in software for them? We have
'syncpoint validation' patch staged in grate's kernel
https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7

(I'll start sending out this and other patches after a bit more thorough
testing.) Improperly used syncpoints potentially could allow one program to
damage others.


Yes, I think the firewall should have this feature for older generations. We
could disable the check on Tegra186, as you point towards in question 4.



3) What exactly does gather filter? Could you list all the commands that it
filters out, please?


According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
are filtered.



Okay, then what about SETSTRMID command, I don't see its disassembly in the
host1x gather debug dump. Is it accidentally missed?



True, it's a new command in Tegra186 and I missed adding it to the 
disassembler. It's probably fine to add it in another patch since it's 
only intended for kernel use and it's useless without IOMMU support 
anyway (which we don't have currently on Tegra186).




4) What about T30/T114 that do not have gather filter? Should we validate those
commands for them in a software firewall?


Yes, the firewall should validate that.



So maybe we should implement several layers of validation in the SW firewall.
Like all layers for T20 (memory boundaries validation etc), software gather
filter for T30/114 and software syncpoint validation for T30/114/124/210.



That seems like a good idea.


Alright, factoring out firewall from job.c probably should be the first step.



Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-21 Thread Mikko Perttunen



On 08/20/2017 07:59 PM, Dmitry Osipenko wrote:

On 20.08.2017 19:44, Dmitry Osipenko wrote:

On 20.08.2017 19:24, Dmitry Osipenko wrote:

On 18.08.2017 19:15, Mikko Perttunen wrote:

The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen 
---
  drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
  drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
  drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
  3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 0161da331702..5c0dc6bb51d1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
return err;
  }
  
+static void enable_gather_filter(struct host1x *host,

+struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   u32 val;
+
+   if (!host->hv_regs)
+   return;


Is it really possible that gather filter could be not present on HW without
hypervisor? Maybe there is other way to enable it in that case?


The hardware may have the hypervisor but Linux may be running as a 
virtual machine without access to the hypervisor, in which case we 
cannot access the registers, and it's the responsibility of the other OS 
acting as hypervisor to enable the gather filter.




Is possible at all that hypervisor could be missed?


BTW, this is also incoherent with the 'syncpoint protection' patch which doesn't
check for hypervisor presence.



However, I noticed that check and it's wrongly placed ;) See comment to the
'syncpoint protection' patch.



Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-21 Thread Mikko Perttunen



On 08/21/2017 08:27 PM, Mikko Perttunen wrote:



On 08/20/2017 07:59 PM, Dmitry Osipenko wrote:

On 20.08.2017 19:44, Dmitry Osipenko wrote:

On 20.08.2017 19:24, Dmitry Osipenko wrote:

On 18.08.2017 19:15, Mikko Perttunen wrote:

The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen 
---
  drivers/gpu/host1x/hw/channel_hw.c  | 22 
++

  drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
  drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
  3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c

index 0161da331702..5c0dc6bb51d1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job 
*job)

  return err;
  }
+static void enable_gather_filter(struct host1x *host,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+u32 val;
+
+if (!host->hv_regs)
+return;


Is it really possible that gather filter could be not present on HW 
without

hypervisor? Maybe there is other way to enable it in that case?


The hardware may have the hypervisor but Linux may be running as a 
virtual machine without access to the hypervisor, in which case we 
cannot access the registers, and it's the responsibility of the other OS 
acting as hypervisor to enable the gather filter.




Is possible at all that hypervisor could be missed?


BTW, this is also incoherent with the 'syncpoint protection' patch 
which doesn't

check for hypervisor presence.



However, I noticed that check and it's wrongly placed ;) See comment 
to the

'syncpoint protection' patch.


Also - thanks, I added the missing check to that patch :)




--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/6] Miscellaneous improvements to Host1x and TegraDRM

2017-09-05 Thread Mikko Perttunen
New in v2:
- Changes in syncpoint protection and u64_to_user_ptr patches.
  See the patches for notes.
- Added patch to support more opcodes in the debug dump 
  disassembly.
- Added patch to fix an incorrect comment.

Thanks,
Mikko

Patch v1 notes:

Hi all,

here are some new features and improvements.

Patch 1 enables syncpoint protection which prevents channels from
touching syncpoints not belonging to them on Tegra186.

Patch 2 enables the gather filter which prevents userspace command
buffers from using CDMA commands usually reserved for the kernel.
A test is available at git://github.com/cyndis/host1x_test, branch
gather-filter.

Patch 3 greatly improves formatting of debug dumps spewed by host1x
in case of job timeouts. They are now actually readable by humans
without use of additional scripts.

Patch 4 is a simple aesthetical fix to the TegraDRM submit path.

Everything was tested on TX1 and TX2 and should be applied on the
previously posted Tegra186 support series.

Cheers,
Mikko

Mikko Perttunen (6):
  gpu: host1x: Enable Tegra186 syncpoint protection
  gpu: host1x: Enable gather filter
  gpu: host1x: Improve debug disassembly formatting
  gpu: host1x: Disassemble more instructions
  gpu: host1x: Fix incorrect comment for channel_request
  drm/tegra: Use u64_to_user_ptr helper

 drivers/gpu/drm/tegra/drm.c |  18 ++---
 drivers/gpu/host1x/channel.c|   3 +-
 drivers/gpu/host1x/debug.c  |  14 +++-
 drivers/gpu/host1x/debug.h  |  14 ++--
 drivers/gpu/host1x/dev.h|  15 +
 drivers/gpu/host1x/hw/channel_hw.c  |  25 +++
 drivers/gpu/host1x/hw/debug_hw.c| 101 ++--
 drivers/gpu/host1x/hw/debug_hw_1x01.c   |  11 +--
 drivers/gpu/host1x/hw/debug_hw_1x06.c   |  12 ++--
 drivers/gpu/host1x/hw/hw_host1x04_channel.h |  12 
 drivers/gpu/host1x/hw/hw_host1x05_channel.h |  12 
 drivers/gpu/host1x/hw/syncpt_hw.c   |  46 +
 drivers/gpu/host1x/syncpt.c |   8 +++
 13 files changed, 246 insertions(+), 45 deletions(-)

-- 
2.14.1



[PATCH v2 6/6] drm/tegra: Use u64_to_user_ptr helper

2017-09-05 Thread Mikko Perttunen
Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
to user pointers instead of writing out the cast manually.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e3331a2bc082..72d5c0021540 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -388,18 +388,21 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_cmdbufs = args->num_cmdbufs;
unsigned int num_relocs = args->num_relocs;
unsigned int num_waitchks = args->num_waitchks;
-   struct drm_tegra_cmdbuf __user *cmdbufs =
-   (void __user *)(uintptr_t)args->cmdbufs;
-   struct drm_tegra_reloc __user *relocs =
-   (void __user *)(uintptr_t)args->relocs;
-   struct drm_tegra_waitchk __user *waitchks =
-   (void __user *)(uintptr_t)args->waitchks;
+   struct drm_tegra_cmdbuf __user *cmdbufs;
+   struct drm_tegra_reloc __user *relocs;
+   struct drm_tegra_waitchk __user *waitchks;
+   struct drm_tegra_syncpt __user *user_syncpt;
struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct host1x_syncpt *sp;
struct host1x_job *job;
int err;
 
+   cmdbufs = u64_to_user_ptr(args->cmdbufs);
+   relocs = u64_to_user_ptr(args->relocs);
+   waitchks = u64_to_user_ptr(args->waitchks);
+   user_syncpt = u64_to_user_ptr(args->syncpts);
+
/* We don't yet support other than one syncpt_incr struct per submit */
if (args->num_syncpts != 1)
return -EINVAL;
@@ -520,8 +523,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
}
}
 
-   if (copy_from_user(, (void __user *)(uintptr_t)args->syncpts,
-  sizeof(syncpt))) {
+   if (copy_from_user(, user_syncpt, sizeof(syncpt))) {
err = -EFAULT;
goto fail;
}
-- 
2.14.1



[PATCH v2 5/6] gpu: host1x: Fix incorrect comment for channel_request

2017-09-05 Thread Mikko Perttunen
This function actually doesn't sleep in the version that was merged.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/channel.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index db9b91d1384c..2fb93c27c1d9 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -128,8 +128,7 @@ static struct host1x_channel *acquire_unused_channel(struct 
host1x *host)
  * host1x_channel_request() - Allocate a channel
  * @device: Host1x unit this channel will be used to send commands to
  *
- * Allocates a new host1x channel for @device. If there are no free channels,
- * this will sleep until one becomes available. May return NULL if CDMA
+ * Allocates a new host1x channel for @device. May return NULL if CDMA
  * initialization fails.
  */
 struct host1x_channel *host1x_channel_request(struct device *dev)
-- 
2.14.1



[PATCH v2 2/6] gpu: host1x: Enable gather filter

2017-09-05 Thread Mikko Perttunen
The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
 drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
 3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 0161da331702..5c0dc6bb51d1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
return err;
 }
 
+static void enable_gather_filter(struct host1x *host,
+struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   u32 val;
+
+   if (!host->hv_regs)
+   return;
+
+   val = host1x_hypervisor_readl(
+   host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+   val |= BIT(ch->id % 32);
+   host1x_hypervisor_writel(
+   host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+   host1x_ch_writel(ch,
+HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
   unsigned int index)
 {
ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+   enable_gather_filter(dev, ch);
return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 95e6f96142b9..2e8b635aa660 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+   return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+   host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+   return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+   host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h 
b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index fce6e2c1ff4c..abbbc2641ce6 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+   return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+   host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+   return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+   host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
-- 
2.14.1



[PATCH v2 4/6] gpu: host1x: Disassemble more instructions

2017-09-05 Thread Mikko Perttunen
The disassembler for debug dumps was missing some newer host1x opcodes.
Add disassembly support for these.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/debug_hw.c  | 57 ---
 drivers/gpu/host1x/hw/debug_hw_1x01.c |  3 +-
 drivers/gpu/host1x/hw/debug_hw_1x06.c |  3 +-
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 1e67667e308c..de2a0ba7a32d 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -30,6 +30,13 @@ enum {
HOST1X_OPCODE_IMM   = 0x04,
HOST1X_OPCODE_RESTART   = 0x05,
HOST1X_OPCODE_GATHER= 0x06,
+   HOST1X_OPCODE_SETSTRMID = 0x07,
+   HOST1X_OPCODE_SETAPPID  = 0x08,
+   HOST1X_OPCODE_SETPYLD   = 0x09,
+   HOST1X_OPCODE_INCR_W= 0x0a,
+   HOST1X_OPCODE_NONINCR_W = 0x0b,
+   HOST1X_OPCODE_GATHER_W  = 0x0c,
+   HOST1X_OPCODE_RESTART_W = 0x0d,
HOST1X_OPCODE_EXTEND= 0x0e,
 };
 
@@ -38,11 +45,16 @@ enum {
HOST1X_OPCODE_EXTEND_RELEASE_MLOCK  = 0x01,
 };
 
-static unsigned int show_channel_command(struct output *o, u32 val)
+#define INVALID_PAYLOAD0x
+
+static unsigned int show_channel_command(struct output *o, u32 val,
+u32 *payload)
 {
-   unsigned int mask, subop, num;
+   unsigned int mask, subop, num, opcode;
+
+   opcode = val >> 28;
 
-   switch (val >> 28) {
+   switch (opcode) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
@@ -97,6 +109,42 @@ static unsigned int show_channel_command(struct output *o, 
u32 val)
val >> 14 & 0x1, val & 0x3fff);
return 1;
 
+   case HOST1X_OPCODE_SETSTRMID:
+   host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
+ val & 0x3f);
+   return 0;
+
+   case HOST1X_OPCODE_SETAPPID:
+   host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
+   return 0;
+
+   case HOST1X_OPCODE_SETPYLD:
+   *payload = val & 0x;
+   host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
+   return 0;
+
+   case HOST1X_OPCODE_INCR_W:
+   case HOST1X_OPCODE_NONINCR_W:
+   host1x_debug_cont(o, "%s(offset=%06x, ",
+ opcode == HOST1X_OPCODE_INCR_W ?
+   "INCR_W" : "NONINCR_W",
+ val & 0x3f);
+   if (*payload == 0) {
+   host1x_debug_cont(o, "[])\n");
+   return 0;
+   } else if (*payload == INVALID_PAYLOAD) {
+   host1x_debug_cont(o, "unknown)\n");
+   return 0;
+   } else {
+   host1x_debug_cont(o, "[");
+   return *payload;
+   }
+
+   case HOST1X_OPCODE_GATHER_W:
+   host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
+ val & 0x3fff);
+   return 2;
+
case HOST1X_OPCODE_EXTEND:
subop = val >> 24 & 0xf;
if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
@@ -122,6 +170,7 @@ static void show_gather(struct output *o, phys_addr_t 
phys_addr,
/* Map dmaget cursor to corresponding mem handle */
u32 offset = phys_addr - pin_addr;
unsigned int data_count = 0, i;
+   u32 payload = INVALID_PAYLOAD;
 
/*
 * Sometimes we're given different hardware address to the same
@@ -139,7 +188,7 @@ static void show_gather(struct output *o, phys_addr_t 
phys_addr,
 
if (!data_count) {
host1x_debug_output(o, "%08x: %08x: ", addr, val);
-   data_count = show_channel_command(o, val);
+   data_count = show_channel_command(o, val, );
} else {
host1x_debug_cont(o, "%08x%s", val,
data_count > 1 ? ", " : "])\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c 
b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 09e1aa7bb5dd..7d1401c6c193 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -78,6 +78,7 @@ static void host1x_debug_show_channel_fifo(struct host1x 
*host,
   struct output *o)
 {
u32 val, rd_ptr, wr_ptr, start, end;
+   u32 payload = INVALID_PAYLOAD;
unsigned int data_count = 0;
 
host1x_debug_output(o, &qu

[PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

2017-09-05 Thread Mikko Perttunen
Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen 
---

Notes:
v2:
- Changed from set_protection(bool) to enable_protection
- Added some comments
- Added missing check for hv_regs being NULL in
  enable_protection

 drivers/gpu/host1x/dev.h   | 15 +
 drivers/gpu/host1x/hw/channel_hw.c |  3 +++
 drivers/gpu/host1x/hw/syncpt_hw.c  | 46 ++
 drivers/gpu/host1x/syncpt.c|  8 +++
 4 files changed, 72 insertions(+)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..7497cc5ead9e 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
u32 (*load)(struct host1x_syncpt *syncpt);
int (*cpu_incr)(struct host1x_syncpt *syncpt);
int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+   void (*assign_channel)(struct host1x_syncpt *syncpt,
+  struct host1x_channel *channel);
+   void (*enable_protection)(struct host1x *host);
 };
 
 struct host1x_intr_ops {
@@ -186,6 +189,18 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
host1x *host,
return host->syncpt_op->patch_wait(sp, patch_addr);
 }
 
+static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
+  struct host1x_syncpt *sp,
+  struct host1x_channel *ch)
+{
+   return host->syncpt_op->assign_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
+{
+   return host->syncpt_op->enable_protection(host);
+}
+
 static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
void (*syncpt_thresh_work)(struct work_struct *))
 {
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
 
syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
+   /* assign syncpoint to channel */
+   host1x_hw_syncpt_assign_channel(host, sp, ch);
+
job->syncpt_end = syncval;
 
/* add a setclass for modules that require it */
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..dc7a44614fef 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
void *patch_addr)
return 0;
 }
 
+/**
+ * syncpt_assign_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_channel(struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+   struct host1x *host = sp->host;
+
+   if (!host->hv_regs)
+   return;
+
+   host1x_sync_writel(host,
+  HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+  HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
+{
+#if HOST1X_HW >= 6
+   if (!host->hv_regs)
+   return;
+
+   host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
 static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+   .assign_channel = syncpt_assign_channel,
+   .enable_protection = syncpt_enable_protection,
 };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..4c7a4c8b2ad2 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gp

[PATCH v2 3/6] gpu: host1x: Improve debug disassembly formatting

2017-09-05 Thread Mikko Perttunen
The host1x driver prints out "disassembly" dumps of the command FIFO
and gather contents on submission timeouts. However, the output has
been quite difficult to read with unnecessary newlines and occasional
missing parentheses.

Fix these problems by using pr_cont to remove unnecessary newlines
and by fixing other small issues.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Dmitry Osipenko 
Tested-by: Dmitry Osipenko 
---
This uses pr_cont, which there are currently talks of being replaced
with something better. I kept using it here for now until there is
some conclusion of what's the best way to replace it.

 drivers/gpu/host1x/debug.c| 14 ++-
 drivers/gpu/host1x/debug.h| 14 ---
 drivers/gpu/host1x/hw/debug_hw.c  | 46 ++-
 drivers/gpu/host1x/hw/debug_hw_1x01.c |  8 +++---
 drivers/gpu/host1x/hw/debug_hw_1x06.c |  9 ---
 5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 2aae0e63214c..dc77ec452ffc 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -40,7 +40,19 @@ void host1x_debug_output(struct output *o, const char *fmt, 
...)
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);
 
-   o->fn(o->ctx, o->buf, len);
+   o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+   va_list args;
+   int len;
+
+   va_start(args, fmt);
+   len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+   va_end(args);
+
+   o->fn(o->ctx, o->buf, len, true);
 }
 
 static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799f..990cce47e737 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -24,22 +24,28 @@
 struct host1x;
 
 struct output {
-   void (*fn)(void *ctx, const char *str, size_t len);
+   void (*fn)(void *ctx, const char *str, size_t len, bool cont);
void *ctx;
char buf[256];
 };
 
-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+   bool cont)
 {
seq_write((struct seq_file *)ctx, str, len);
 }
 
-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+  bool cont)
 {
-   pr_info("%s", str);
+   if (cont)
+   pr_cont("%s", str);
+   else
+   pr_info("%s", str);
 }
 
 void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, 
...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);
 
 extern unsigned int host1x_debug_trace_cmdbuf;
 
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 770d92e62d69..1e67667e308c 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,48 +40,59 @@ enum {
 
 static unsigned int show_channel_command(struct output *o, u32 val)
 {
-   unsigned int mask, subop;
+   unsigned int mask, subop, num;
 
switch (val >> 28) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
-   host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, 
mask=%02x, [",
+   host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, 
mask=%02x, [",
val >> 6 & 0x3ff,
val >> 16 & 0xfff, mask);
return hweight8(mask);
}
 
-   host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+   host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
return 0;
 
case HOST1X_OPCODE_INCR:
-   host1x_debug_output(o, "INCR(offset=%03x, [",
+   num = val & 0x;
+   host1x_debug_cont(o, "INCR(offset=%03x, [",
val >> 16 & 0xfff);
-   return val & 0x;
+   if (!num)
+   host1x_debug_cont(o, "])\n");
+
+   return num;
 
case HOST1X_OPCODE_NONINCR:
-   host1x_debug_output(o, "NONINCR(offset=%03x, [",
+   num = val & 0x;
+   host1x_debug_cont(o, "NONINCR(offset=%03x, [",
val >> 16 & 0xfff);
-   return val & 0x

[PATCH v2 5/6] gpu: host1x: Add Tegra186 support

2017-09-05 Thread Mikko Perttunen
Add support for the implementation of Host1x present on the Tegra186.
The register space has been shuffled around a little bit, requiring
addition of some chip-specific code sections. Tegra186 also adds
several new features, most importantly the hypervisor, but those are
not yet supported with this commit.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Dmitry Osipenko 
Tested-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/Makefile|   3 +-
 drivers/gpu/host1x/dev.c   |  60 +++-
 drivers/gpu/host1x/dev.h   |   4 +
 drivers/gpu/host1x/hw/cdma_hw.c|  49 ---
 drivers/gpu/host1x/hw/debug_hw.c   | 137 +--
 drivers/gpu/host1x/hw/debug_hw_1x01.c  | 154 +
 drivers/gpu/host1x/hw/debug_hw_1x06.c  | 133 ++
 drivers/gpu/host1x/hw/host1x01.c   |   2 +
 drivers/gpu/host1x/hw/host1x02.c   |   2 +
 drivers/gpu/host1x/hw/host1x04.c   |   2 +
 drivers/gpu/host1x/hw/host1x05.c   |   2 +
 drivers/gpu/host1x/hw/host1x06.c   |  44 ++
 drivers/gpu/host1x/hw/host1x06.h   |  26 
 drivers/gpu/host1x/hw/host1x06_hardware.h  | 142 +++
 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |  32 +
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 181 +
 drivers/gpu/host1x/hw/hw_host1x06_vm.h |  47 +++
 drivers/gpu/host1x/hw/intr_hw.c|  29 ++--
 18 files changed, 880 insertions(+), 169 deletions(-)
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x01.c
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x06.c
 create mode 100644 drivers/gpu/host1x/hw/host1x06.c
 create mode 100644 drivers/gpu/host1x/hw/host1x06.h
 create mode 100644 drivers/gpu/host1x/hw/host1x06_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_vm.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index a1d9974cfcb5..4fb61bd57aee 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -11,6 +11,7 @@ host1x-y = \
hw/host1x01.o \
hw/host1x02.o \
hw/host1x04.o \
-   hw/host1x05.o
+   hw/host1x05.o \
+   hw/host1x06.o
 
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 2c58a390123a..6a4ff2d59496 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -39,6 +39,17 @@
 #include "hw/host1x02.h"
 #include "hw/host1x04.h"
 #include "hw/host1x05.h"
+#include "hw/host1x06.h"
+
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
+{
+   writel(v, host1x->hv_regs + r);
+}
+
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r)
+{
+   return readl(host1x->hv_regs + r);
+}
 
 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -104,7 +115,19 @@ static const struct host1x_info host1x05_info = {
.dma_mask = DMA_BIT_MASK(34),
 };
 
+static const struct host1x_info host1x06_info = {
+   .nb_channels = 63,
+   .nb_pts = 576,
+   .nb_mlocks = 24,
+   .nb_bases = 16,
+   .init = host1x06_init,
+   .sync_offset = 0x0,
+   .dma_mask = DMA_BIT_MASK(34),
+   .has_hypervisor = true,
+};
+
 static const struct of_device_id host1x_of_match[] = {
+   { .compatible = "nvidia,tegra186-host1x", .data = _info, },
{ .compatible = "nvidia,tegra210-host1x", .data = _info, },
{ .compatible = "nvidia,tegra124-host1x", .data = _info, },
{ .compatible = "nvidia,tegra114-host1x", .data = _info, },
@@ -117,8 +140,9 @@ MODULE_DEVICE_TABLE(of, host1x_of_match);
 static int host1x_probe(struct platform_device *pdev)
 {
const struct of_device_id *id;
+   const struct host1x_info *info;
struct host1x *host;
-   struct resource *regs;
+   struct resource *regs, *hv_regs = NULL;
int syncpt_irq;
int err;
 
@@ -126,10 +150,28 @@ static int host1x_probe(struct platform_device *pdev)
if (!id)
return -EINVAL;
 
-   regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   if (!regs) {
-   dev_err(>dev, "failed to get registers\n");
-   return -ENXIO;
+   info = id->data;
+
+   if (info->has_hypervisor) {
+   regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
+   if (!regs) {
+   dev_err(>dev, "failed to get vm registers\n");
+   return -ENXIO;
+   }
+
+   hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+  &qu

[PATCH v2 4/6] dt-bindings: host1x: Add Tegra186 information

2017-09-05 Thread Mikko Perttunen
Add the Tegra186-specific hypervisor-related register range
properties.

Signed-off-by: Mikko Perttunen 
---
v2:
- Dropped incorrect note about cells properties.

 .../devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt   | 4 
 1 file changed, 4 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt 
b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
index 74e1e8add5a1..844e0103fb0d 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
@@ -3,6 +3,10 @@ NVIDIA Tegra host1x
 Required properties:
 - compatible: "nvidia,tegra-host1x"
 - reg: Physical base address and length of the controller's registers.
+  For pre-Tegra186, one entry describing the whole register area.
+  For Tegra186, one entry for each entry in reg-names:
+"vm" - VM region assigned to Linux
+"hypervisor" - Hypervisor region (only if Linux acts as hypervisor)
 - interrupts: The interrupt outputs from the controller.
 - #address-cells: The number of cells used to represent physical base addresses
   in the host1x address space. Should be 1.
-- 
2.14.1



[PATCH v2 2/6] arm64: tegra: Add host1x on Tegra186

2017-09-05 Thread Mikko Perttunen
Add the node for Host1x on the Tegra186, without any subdevices
for now.

Signed-off-by: Mikko Perttunen 
---
v2:
- Changed address-cells and size-cells to 1 and fixed the ranges
  property correspondingly.

 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index a964d246c0e9..b1a3e404c7be 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -355,6 +355,24 @@
nvidia,bpmp = <>;
};
 
+   host1x@13e0 {
+   compatible = "nvidia,tegra186-host1x", "simple-bus";
+   reg = <0x0 0x13e0 0x0 0x1>,
+ <0x0 0x13e1 0x0 0x1>;
+   reg-names = "hypervisor", "vm";
+   interrupts = ,
+;
+   clocks = < TEGRA186_CLK_HOST1X>;
+   clock-names = "host1x";
+   resets = < TEGRA186_RESET_HOST1X>;
+   reset-names = "host1x";
+
+   #address-cells = <1>;
+   #size-cells = <1>;
+
+   ranges = <0x1500 0x0 0x1500 0x0100>;
+   };
+
gpu@1700 {
compatible = "nvidia,gp10b";
reg = <0x0 0x1700 0x0 0x100>,
-- 
2.14.1



[PATCH v2 6/6] drm/tegra: Add Tegra186 support for VIC

2017-09-05 Thread Mikko Perttunen
Add Tegra186 support for VIC - no changes are required except for new
firmware and compatibility string.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c |  1 +
 drivers/gpu/drm/tegra/vic.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 3ba659a5940d..e3331a2bc082 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1281,6 +1281,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra210-sor", },
{ .compatible = "nvidia,tegra210-sor1", },
{ .compatible = "nvidia,tegra210-vic", },
+   { .compatible = "nvidia,tegra186-vic", },
{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 2448229fa653..6697a21a250d 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -270,9 +270,16 @@ static const struct vic_config vic_t210_config = {
.firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE,
 };
 
+#define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin"
+
+static const struct vic_config vic_t186_config = {
+   .firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE,
+};
+
 static const struct of_device_id vic_match[] = {
{ .compatible = "nvidia,tegra124-vic", .data = _t124_config },
{ .compatible = "nvidia,tegra210-vic", .data = _t210_config },
+   { .compatible = "nvidia,tegra186-vic", .data = _t186_config },
{ },
 };
 
@@ -405,3 +412,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_124_VIC_FIRMWARE);
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
 MODULE_FIRMWARE(NVIDIA_TEGRA_210_VIC_FIRMWARE);
 #endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE);
+#endif
-- 
2.14.1



[PATCH v2 1/6] arm64: tegra: Add #power-domain-cells for BPMP

2017-09-05 Thread Mikko Perttunen
Add #power-domain-cells for the BPMP node on Tegra186 so that the power
domain provider may be used.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 0b0552c9f7dd..a964d246c0e9 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -443,6 +443,7 @@
shmem = <_bpmp_tx _bpmp_rx>;
#clock-cells = <1>;
#reset-cells = <1>;
+   #power-domain-cells = <1>;
 
bpmp_i2c: i2c {
compatible = "nvidia,tegra186-bpmp-i2c";
-- 
2.14.1



[PATCH v2 3/6] arm64: tegra: Add VIC on Tegra186

2017-09-05 Thread Mikko Perttunen
Add a node for the Video Image Compositor on the Tegra186.

Signed-off-by: Mikko Perttunen 
---
v2:
- Fixed reg property in accordance with changed parent cells.

 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index b1a3e404c7be..584bce64d41f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -371,6 +371,18 @@
#size-cells = <1>;
 
ranges = <0x1500 0x0 0x1500 0x0100>;
+
+   vic@1534 {
+   compatible = "nvidia,tegra186-vic";
+   reg = <0x1534 0x4>;
+   interrupts = ;
+   clocks = < TEGRA186_CLK_VIC>;
+   clock-names = "vic";
+   resets = < TEGRA186_RESET_VIC>;
+   reset-names = "vic";
+
+   power-domains = < TEGRA186_POWER_DOMAIN_VIC>;
+   };
};
 
gpu@1700 {
-- 
2.14.1



[PATCH v2 0/6] Host1x and VIC support for Tegra186

2017-09-05 Thread Mikko Perttunen
Hi,

not many changes in v2:

Changed address-cells and size-cells for the Host1x device tree node
to have value 1, since all subdevices fit in the lower 4G. Also dropped
the incorrect change about this from the dt-bindings patch. Thanks to
Rob for pointing this out.

Mikko

Notes for v1:

Hi everyone,

this series adds basic support for the Host1x channel engine and the
VIC 2d compositor unit on Tegra186. The first three patches do the
required device tree changes, the fourth patch updates the device tree
binding documentation, and the two remaining patches add the actual
implementation, almost all of which is in Host1x itself.

The Tegra186 Host1x is a relatively large update over previous
generations, which can be seen in the diffstat. The biggest change is
that Host1x is now contains separate hypervisor and vm register
apertures to support virtualization at the hardware level. This driver,
however, currently assumes that this instance of Linux is the sole
operating system having access to the hardware.

This combined with increased numbers of supported channels and
syncpoints have caused a number of register space changes that are
responsible for most of the updated code.

The series has been tested on the Jetson TX1 (T210) and TX2 (T186)
using the host1x_test test suite available at

http://github.com/cyndis/host1x_test

The series itself is available at

http://github.com/cyndis/linux, branch host1x-t186-1

Cheers,
Mikko

Mikko Perttunen (6):
  arm64: tegra: Add #power-domain-cells for BPMP
  arm64: tegra: Add host1x on Tegra186
  arm64: tegra: Add VIC on Tegra186
  dt-bindings: host1x: Add Tegra186 information
  gpu: host1x: Add Tegra186 support
  drm/tegra: Add Tegra186 support for VIC

 .../display/tegra/nvidia,tegra20-host1x.txt|   4 +
 arch/arm64/boot/dts/nvidia/tegra186.dtsi   |  31 
 drivers/gpu/drm/tegra/drm.c|   1 +
 drivers/gpu/drm/tegra/vic.c|  10 ++
 drivers/gpu/host1x/Makefile|   3 +-
 drivers/gpu/host1x/dev.c   |  60 ++-
 drivers/gpu/host1x/dev.h   |   4 +
 drivers/gpu/host1x/hw/cdma_hw.c|  49 +++---
 drivers/gpu/host1x/hw/debug_hw.c   | 137 +---
 drivers/gpu/host1x/hw/debug_hw_1x01.c  | 154 ++
 drivers/gpu/host1x/hw/debug_hw_1x06.c  | 133 +++
 drivers/gpu/host1x/hw/host1x01.c   |   2 +
 drivers/gpu/host1x/hw/host1x02.c   |   2 +
 drivers/gpu/host1x/hw/host1x04.c   |   2 +
 drivers/gpu/host1x/hw/host1x05.c   |   2 +
 drivers/gpu/host1x/hw/host1x06.c   |  44 +
 drivers/gpu/host1x/hw/host1x06.h   |  26 +++
 drivers/gpu/host1x/hw/host1x06_hardware.h  | 142 
 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |  32 
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 181 +
 drivers/gpu/host1x/hw/hw_host1x06_vm.h |  47 ++
 drivers/gpu/host1x/hw/intr_hw.c|  29 ++--
 22 files changed, 926 insertions(+), 169 deletions(-)
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x01.c
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x06.c
 create mode 100644 drivers/gpu/host1x/hw/host1x06.c
 create mode 100644 drivers/gpu/host1x/hw/host1x06.h
 create mode 100644 drivers/gpu/host1x/hw/host1x06_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_vm.h

-- 
2.14.1



Re: [PATCH v2 0/6] Host1x and VIC support for Tegra186

2017-09-05 Thread Mikko Perttunen

On 05.09.2017 14:10, Daniel Vetter wrote:


Since this is new hw support, is there also open source userspace using
all this?


The VIC HW in Tegra186 is backwards compatible with the one in Tegra210, 
which has open userspace (https://github.com/cyndis/vaapi-tegra-driver), 
so that userspace should remain compatible. The old firmware is not 
compatible so we need a new compatibility string for that.




Thanks, Daniel


Thanks,
Mikko


Re: [PATCH v2 1/5] arm64: tegra: Add BPMP thermal sensor to Tegra186

2017-07-27 Thread Mikko Perttunen

Good point, so patches 1/5 and 2/5 should be swapped around.

Mikko

On 27.07.2017 09:47, kbuild test robot wrote:

Hi Mikko,

[auto build test ERROR on tegra/for-next]
[also build test ERROR on v4.13-rc2 next-20170726]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Mikko-Perttunen/arm64-tegra-Add-BPMP-thermal-sensor-to-Tegra186/20170726-055759
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git for-next
config: arm64-allnoconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm64

Note: the 
linux-review/Mikko-Perttunen/arm64-tegra-Add-BPMP-thermal-sensor-to-Tegra186/20170726-055759
 HEAD 2aebe2225f7f5f03abf6098e07387fa011ec16a8 builds fine.
  It only hurts bisectibility.

All errors (new ones prefixed by >>):

   In file included from arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi:1:0,
from arch/arm64/boot/dts/nvidia/tegra186-p2771-.dts:5:

arch/arm64/boot/dts/nvidia/tegra186.dtsi:7:55: fatal error: 
dt-bindings/thermal/tegra186-bpmp-thermal.h: No such file or directory

#include 
  ^
   compilation terminated.

vim +7 arch/arm64/boot/dts/nvidia/tegra186.dtsi

   > 7   #include 
 8  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation



Re: [PATCH v1] gpu: host1x: Fix dma_free_wc() argument in the error path

2018-04-23 Thread Mikko Perttunen

Reviewed-by: Mikko Perttunen 

On 23.04.2018 12:54, Dmitry Osipenko wrote:

If IOVA allocation or IOMMU mapping fails, dma_free_wc() is invoked with
size=0 because of a typo, that triggers "kernel BUG at mm/vmalloc.c:124!".

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/cdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 93df28228721..0724122afeac 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -127,7 +127,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
 iommu_free_iova:
__free_iova(>iova, alloc);
 iommu_free_mem:
-   dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+   dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);

return err;
 }



Re: [PATCH V2 3/9] dt-bindings: Tegra186 tachometer device tree bindings

2018-04-09 Thread Mikko Perttunen



On 04/09/2018 04:21 PM, Rob Herring wrote:

On Mon, Apr 9, 2018 at 12:38 AM, Mikko Perttunen  wrote:

Rob,


Please don't top post to lists.


this binding is for a specific IP block (for measuring/aggregating input
pulses) on the Tegra186 SoC, so I don't think it fits into any generic
binding.


What is it hooked up to to measure? You only mention "fan" five times
in the doc.


In practice, fans.



You have #pwm-cells too, so this block has PWM output as well? If not,
then where's the PWM for the fan control because there is no point in
having fan tach without some control mechanism.


It doesn't provide a PWM output. The (Linux) PWM framework provides 
functionality in both directions - control and capture. But if the 
device tree #pwm-cells/pwms properties are only for control, we may need 
to introduce a new #capture-pwm-cells/capture-pwms or similar.


The idea is that the generic fan node can then specify two pwms, one for 
control and one for capture, to enable e.g. closed-loop control (I'm not 
personally familiar with the usecase for this but I could imagine 
something like that). The control PWM can be something completely 
different, maybe not a PWM in the first place (e.g. some fixed voltage).




There's only so many ways to control fans and types of fans, so yes,
the interface of control and feedback lines between a fan and its
controller should absolutely be generic.


I'm not quite getting what you mean by this. Clearly we need a custom 
compatibility string for the tachometer as it's a different hardware 
block with different programming than others. Or are you complaining 
about the nvidia,pulse-per-rev/capture-window-len properties?


Thanks,
Mikko



Rob



Thanks,
Mikko


On 03/27/2018 05:52 PM, Rob Herring wrote:


On Wed, Mar 21, 2018 at 10:10:38AM +0530, Rajkumar Rampelli wrote:


Supply Device tree binding documentation for the NVIDIA
Tegra186 SoC's Tachometer Controller

Signed-off-by: Rajkumar Rampelli 
---

V2: Renamed compatible string to "nvidia,tegra186-pwm-tachometer"
  Renamed dt property values of clock-names and reset-names to
"tachometer"
  from "tach"



Read my prior comments on v1.

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2 3/9] dt-bindings: Tegra186 tachometer device tree bindings

2018-04-08 Thread Mikko Perttunen

Rob,

this binding is for a specific IP block (for measuring/aggregating input 
pulses) on the Tegra186 SoC, so I don't think it fits into any generic 
binding.


Thanks,
Mikko

On 03/27/2018 05:52 PM, Rob Herring wrote:

On Wed, Mar 21, 2018 at 10:10:38AM +0530, Rajkumar Rampelli wrote:

Supply Device tree binding documentation for the NVIDIA
Tegra186 SoC's Tachometer Controller

Signed-off-by: Rajkumar Rampelli 
---

V2: Renamed compatible string to "nvidia,tegra186-pwm-tachometer"
 Renamed dt property values of clock-names and reset-names to "tachometer"
 from "tach"


Read my prior comments on v1.

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH 05/10] hwmon: generic-pwm-tachometer: Add generic PWM based tachometer

2018-02-27 Thread Mikko Perttunen

On 02/28/2018 08:12 AM, Rajkumar Rampelli wrote:


On Wednesday 28 February 2018 11:28 AM, Guenter Roeck wrote:

On 02/27/2018 09:38 PM, Rajkumar Rampelli wrote:


On Wednesday 21 February 2018 08:20 PM, Guenter Roeck wrote:

On 02/20/2018 10:58 PM, Rajkumar Rampelli wrote:

Add generic PWM based tachometer driver via HWMON interface
to report the RPM of motor. This drivers get the period/duty
cycle from PWM IP which captures the motor PWM output.

This driver implements a simple interface for monitoring the speed of
a fan and exposes it in roatations per minute (RPM) to the user space
by using the hwmon's sysfs interface

Signed-off-by: Rajkumar Rampelli 
---
  Documentation/hwmon/generic-pwm-tachometer |  17 +
  drivers/hwmon/Kconfig  |  10 +++
  drivers/hwmon/Makefile |   1 +
  drivers/hwmon/generic-pwm-tachometer.c | 112 
+

  4 files changed, 140 insertions(+)
  create mode 100644 Documentation/hwmon/generic-pwm-tachometer
  create mode 100644 drivers/hwmon/generic-pwm-tachometer.c

diff --git a/Documentation/hwmon/generic-pwm-tachometer 
b/Documentation/hwmon/generic-pwm-tachometer

new file mode 100644
index 000..e0713ee
--- /dev/null
+++ b/Documentation/hwmon/generic-pwm-tachometer
@@ -0,0 +1,17 @@
+Kernel driver generic-pwm-tachometer
+
+
+This driver enables the use of a PWM module to monitor a fan. It 
uses the
+generic PWM interface and can be used on SoCs as along as the SoC 
supports

+Tachometer controller that moniors the Fan speed in periods.
+
+Author: Rajkumar Rampelli 
+
+Description
+---
+
+The driver implements a simple interface for monitoring the Fan 
speed using
+PWM module and Tachometer controller. It requests period value 
through PWM
+capture interface to Tachometer and measures the Rotations per 
minute using
+received period value. It exposes the Fan speed in RPM to the user 
space by

+using the hwmon's sysfs interface.
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index ef23553..8912dcb 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1878,6 +1878,16 @@ config SENSORS_XGENE
    If you say yes here you get support for the temperature
    and power sensors for APM X-Gene SoC.
  +config GENERIC_PWM_TACHOMETER
+    tristate "Generic PWM based tachometer driver"
+    depends on PWM
+    help
+  Enables a driver to use PWM signal from motor to use
+  for measuring the motor speed. The RPM is captured by
+  PWM modules which has PWM capture capability and this
+  drivers reads the captured data from PWM IP to convert
+  it to speed in RPM.
+
  if ACPI
    comment "ACPI drivers"
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index f814b4a..9dcc374 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -175,6 +175,7 @@ obj-$(CONFIG_SENSORS_WM8350)    += wm8350-hwmon.o
  obj-$(CONFIG_SENSORS_XGENE)    += xgene-hwmon.o
    obj-$(CONFIG_PMBUS)    += pmbus/
+obj-$(CONFIG_GENERIC_PWM_TACHOMETER) += generic-pwm-tachometer.o
    ccflags-$(CONFIG_HWMON_DEBUG_CHIP) := -DDEBUG
  diff --git a/drivers/hwmon/generic-pwm-tachometer.c 
b/drivers/hwmon/generic-pwm-tachometer.c

new file mode 100644
index 000..9354d43
--- /dev/null
+++ b/drivers/hwmon/generic-pwm-tachometer.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or 
modify it

+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but 
WITHOUT
+ * ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
License for

+ * more details.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct pwm_hwmon_tach {
+    struct device    *dev;
+    struct pwm_device    *pwm;
+    struct device    *hwmon;
+};
+
+static ssize_t show_rpm(struct device *dev, struct 
device_attribute *attr,

+    char *buf)
+{
+    struct pwm_hwmon_tach *ptt = dev_get_drvdata(dev);
+    struct pwm_device *pwm = ptt->pwm;
+    struct pwm_capture result;
+    int err;
+    unsigned int rpm = 0;
+
+    err = pwm_capture(pwm, , 0);
+    if (err < 0) {
+    dev_err(ptt->dev, "Failed to capture PWM: %d\n", err);
+    return err;
+    }
+
+    if (result.period)
+    rpm = DIV_ROUND_CLOSEST_ULL(60ULL * NSEC_PER_SEC,
+    result.period);
+
+    return sprintf(buf, "%u\n", rpm);
+}
+
+static SENSOR_DEVICE_ATTR(rpm, 0444, show_rpm, NULL, 0);
+
+static struct attribute *pwm_tach_attrs[] = {
+    _dev_attr_rpm.dev_attr.attr,
+    NULL,
+};


"rpm" is not a standard hwmon sysfs attribute. If you don't provide
a single standard hwmon sysfs attribute, having a 

Re: [PATCH 5/6] arm64: tegra: Add Tegra194 chip device tree

2018-01-11 Thread Mikko Perttunen

On 11.01.2018 23:56, Rob Herring wrote:

On Mon, Jan 08, 2018 at 06:54:37AM +0200, Mikko Perttunen wrote:

Add the chip-level device tree, including binding headers, for the
NVIDIA Tegra194 "Xavier" system-on-chip. Only a small subset of devices
are initially available, enough to boot to UART console.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 334 +
 include/dt-bindings/clock/tegra194-clock.h |  59 +
 include/dt-bindings/gpio/tegra194-gpio.h   |  59 +
 include/dt-bindings/reset/tegra194-reset.h |  40 
 4 files changed, 492 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
new file mode 100644
index ..51eff420816d
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/ {
+   compatible = "nvidia,tegra194";


Documented?


Ah, wasn't aware these needed to be documented as well. Will add in v2.




+   interrupt-parent = <>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   uarta: serial@310 {


These should all be under a bus node. Tegra failed to do this at the
start and we're still copy-n-pasting this mistake.

Then you probably don't need 2 address and size cells for all the
peripherals.


So I should create one big simple-bus node and put everything with an 
address apart from /memory (and maybe /sysram) inside it?





+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0310 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTA>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTA>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartb: serial@311 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0311 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTB>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTB>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartd: serial@313 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0313 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTD>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTD>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uarte: serial@314 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0314 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTE>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTE>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartf: serial@315 {
+   compatible = "nvidia,tegra194-uart", "nvidia,tegra20-uart";
+   reg = <0x0 0x0315 0x0 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTF>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTF>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   gen1_i2c: i2c@316 {
+   compatible = "nvidia,tegra194-i2c", "nvidia,tegra114-i2c";
+   reg = <0x0 0x0316 0x0 0x1>;
+   interrupts = ;
+   #address-cells = <1>;
+   #size-cells = <0>;
+   clocks = < TEGRA194_CLK_I2C1>;
+   clock-names = "div-clk";
+   resets = < TEGRA194_RESET_I2C1>;
+   reset-names = "i2c";
+   status = "disabled";
+ 

Re: [PATCH v3 6/7] arm64: tegra: Add Tegra194 chip device tree

2018-02-19 Thread Mikko Perttunen

On 16.02.2018 14:33, Philippe Ombredanne wrote:

Mikko,

On Thu, Feb 15, 2018 at 3:52 PM, Mikko Perttunen  wrote:

Add the chip-level device tree, including binding headers, for the
NVIDIA Tegra194 "Xavier" system-on-chip. Only a small subset of devices
are initially available, enough to boot to UART console.

Signed-off-by: Mikko Perttunen 
---

Notes:
v3:
- added hypervisor-related apertures to GIC node
- removed GPL boilerplate in favor of SPDX and harmonized
  copyright headers


Thanks for this boilerplate removal! but see a few nits below.




--- /dev/null
+++ b/include/dt-bindings/clock/tegra194-clock.h
@@ -0,0 +1,653 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */


The proper style should be this for a .h header on the first line per [1]


I see -- I'll fix this in v4 (or if there are no further issues maybe 
Thierry can fix this while applying?:)


Mikko




+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ */


[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/license-rules.rst




--- /dev/null
+++ b/include/dt-bindings/gpio/tegra194-gpio.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+




Same as above


--- /dev/null
+++ b/include/dt-bindings/power/tegra194-powergate.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */




Same as above


--- /dev/null
+++ b/include/dt-bindings/reset/tegra194-reset.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */




Same as above



[PATCH v4 1/7] firmware: tegra: Simplify channel management

2018-02-20 Thread Mikko Perttunen
The Tegra194 BPMP only implements 5 channels (4 to BPMP, 1 to CCPLEX),
and they are not placed contiguously in memory. The current channel
management in the BPMP driver does not support this.

Simplify and refactor the channel management such that only one atomic
transmit channel and one receive channel are supported, and channels
are not required to be placed contiguously in memory. The same
configuration also works on T186 so we end up with less code.

Signed-off-by: Mikko Perttunen 
---
 drivers/firmware/tegra/bpmp.c | 142 +++---
 include/soc/tegra/bpmp.h  |   4 +-
 2 files changed, 66 insertions(+), 80 deletions(-)

diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index a7f461f2e650..81bc2dce8626 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -70,57 +70,20 @@ void tegra_bpmp_put(struct tegra_bpmp *bpmp)
 }
 EXPORT_SYMBOL_GPL(tegra_bpmp_put);
 
-static int tegra_bpmp_channel_get_index(struct tegra_bpmp_channel *channel)
-{
-   return channel - channel->bpmp->channels;
-}
-
 static int
 tegra_bpmp_channel_get_thread_index(struct tegra_bpmp_channel *channel)
 {
struct tegra_bpmp *bpmp = channel->bpmp;
-   unsigned int offset, count;
+   unsigned int count;
int index;
 
-   offset = bpmp->soc->channels.thread.offset;
count = bpmp->soc->channels.thread.count;
 
-   index = tegra_bpmp_channel_get_index(channel);
-   if (index < 0)
-   return index;
-
-   if (index < offset || index >= offset + count)
+   index = channel - channel->bpmp->threaded_channels;
+   if (index < 0 || index >= count)
return -EINVAL;
 
-   return index - offset;
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_thread(struct tegra_bpmp *bpmp, unsigned int index)
-{
-   unsigned int offset = bpmp->soc->channels.thread.offset;
-   unsigned int count = bpmp->soc->channels.thread.count;
-
-   if (index >= count)
-   return NULL;
-
-   return >channels[offset + index];
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_tx(struct tegra_bpmp *bpmp)
-{
-   unsigned int offset = bpmp->soc->channels.cpu_tx.offset;
-
-   return >channels[offset + smp_processor_id()];
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_rx(struct tegra_bpmp *bpmp)
-{
-   unsigned int offset = bpmp->soc->channels.cpu_rx.offset;
-
-   return >channels[offset];
+   return index;
 }
 
 static bool tegra_bpmp_message_valid(const struct tegra_bpmp_message *msg)
@@ -271,11 +234,7 @@ tegra_bpmp_write_threaded(struct tegra_bpmp *bpmp, 
unsigned int mrq,
goto unlock;
}
 
-   channel = tegra_bpmp_channel_get_thread(bpmp, index);
-   if (!channel) {
-   err = -EINVAL;
-   goto unlock;
-   }
+   channel = >threaded_channels[index];
 
if (!tegra_bpmp_master_free(channel)) {
err = -EBUSY;
@@ -328,12 +287,18 @@ int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
if (!tegra_bpmp_message_valid(msg))
return -EINVAL;
 
-   channel = tegra_bpmp_channel_get_tx(bpmp);
+   channel = bpmp->tx_channel;
+
+   spin_lock(>atomic_tx_lock);
 
err = tegra_bpmp_channel_write(channel, msg->mrq, MSG_ACK,
   msg->tx.data, msg->tx.size);
-   if (err < 0)
+   if (err < 0) {
+   spin_unlock(>atomic_tx_lock);
return err;
+   }
+
+   spin_unlock(>atomic_tx_lock);
 
err = mbox_send_message(bpmp->mbox.channel, NULL);
if (err < 0)
@@ -607,7 +572,7 @@ static void tegra_bpmp_handle_rx(struct mbox_client 
*client, void *data)
unsigned int i, count;
unsigned long *busy;
 
-   channel = tegra_bpmp_channel_get_rx(bpmp);
+   channel = bpmp->rx_channel;
count = bpmp->soc->channels.thread.count;
busy = bpmp->threaded.busy;
 
@@ -619,9 +584,7 @@ static void tegra_bpmp_handle_rx(struct mbox_client 
*client, void *data)
for_each_set_bit(i, busy, count) {
struct tegra_bpmp_channel *channel;
 
-   channel = tegra_bpmp_channel_get_thread(bpmp, i);
-   if (!channel)
-   continue;
+   channel = >threaded_channels[i];
 
if (tegra_bpmp_master_acked(channel)) {
tegra_bpmp_channel_signal(channel);
@@ -698,7 +661,6 @@ static void tegra_bpmp_channel_cleanup(struct 
tegra_bpmp_channel *channel)
 
 static int tegra_bpmp_probe(struct platform_device *pdev)
 {
-   struct tegra_bpmp_channel *channel;
struct tegra_bpmp *bpmp;
unsigned int i;
char tag[32];
@@ -758,24 +720,45 @@ static int tegra_bpmp_probe(stru

[PATCH v4 0/7] Initial support for NVIDIA Tegra194

2018-02-20 Thread Mikko Perttunen
Hello everyone,

this series adds initial support for the NVIDIA Tegra194 "Xavier"
system-on-chip. Initially UART, I2C, SDMMC, as well as the PMIC
are supported, allowing booting to a console.

The changes consist almost completely of the new device trees,
however some fixes are required in the BPMP driver to support the
new channel layout in Tegra194.

The series has been tested on Tegra186 (Jetson TX2) and Tegra194
(P2972).

Cheers,
Mikko

Mikko Perttunen (7):
  firmware: tegra: Simplify channel management
  soc/tegra: Add Tegra194 SoC configuration option
  soc/tegra: pmc: Add Tegra194 compatibility string
  dt-bindings: tegra: Add missing chips and NVIDIA boards
  dt-bindings: tegra: Add documentation for nvidia,tegra194-pmc
  arm64: tegra: Add Tegra194 chip device tree
  arm64: tegra: Add device tree for the Tegra194 P2972- board

 Documentation/devicetree/bindings/arm/tegra.txt|  16 +
 .../bindings/arm/tegra/nvidia,tegra186-pmc.txt |   2 +
 arch/arm64/boot/dts/nvidia/Makefile|   1 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 248 +++
 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts |  16 +
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 344 +
 arch/arm64/configs/defconfig   |   1 +
 drivers/firmware/tegra/bpmp.c  | 142 -
 drivers/soc/tegra/Kconfig  |  10 +
 drivers/soc/tegra/pmc.c|   1 +
 include/dt-bindings/clock/tegra194-clock.h | 321 +++
 include/dt-bindings/gpio/tegra194-gpio.h   |  61 
 include/dt-bindings/power/tegra194-powergate.h |  35 +++
 include/dt-bindings/reset/tegra194-reset.h | 152 +
 include/soc/tegra/bpmp.h   |   4 +-
 15 files changed, 1274 insertions(+), 80 deletions(-)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/power/tegra194-powergate.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

-- 
2.16.1



[PATCH v4 7/7] arm64: tegra: Add device tree for the Tegra194 P2972-0000 board

2018-02-20 Thread Mikko Perttunen
Add device tree files for the Tegra194 P2972- development board.
The board consists of the P2888 compute module and the P2822 baseboard.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/Makefile|   1 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 248 +
 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts |  16 ++
 3 files changed, 265 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts

diff --git a/arch/arm64/boot/dts/nvidia/Makefile 
b/arch/arm64/boot/dts/nvidia/Makefile
index 676aa2f238d1..7c13d7df484e 100644
--- a/arch/arm64/boot/dts/nvidia/Makefile
+++ b/arch/arm64/boot/dts/nvidia/Makefile
@@ -5,3 +5,4 @@ dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
 dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
 dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-smaug.dtb
 dtb-$(CONFIG_ARCH_TEGRA_186_SOC) += tegra186-p2771-.dtb
+dtb-$(CONFIG_ARCH_TEGRA_194_SOC) += tegra194-p2972-.dtb
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
new file mode 100644
index ..ecb034177fc2
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tegra194.dtsi"
+
+#include 
+
+/ {
+   model = "NVIDIA Tegra194 P2888 Processor Module";
+   compatible = "nvidia,p2888", "nvidia,tegra194";
+
+   aliases {
+   sdhci0 = "/cbb/sdhci@346";
+   sdhci1 = "/cbb/sdhci@340";
+   serial0 = 
+   i2c0 = "/bpmp/i2c";
+   i2c1 = "/cbb/i2c@316";
+   i2c2 = "/cbb/i2c@c24";
+   i2c3 = "/cbb/i2c@318";
+   i2c4 = "/cbb/i2c@319";
+   i2c5 = "/cbb/i2c@31c";
+   i2c6 = "/cbb/i2c@c25";
+   i2c7 = "/cbb/i2c@31e";
+   };
+
+   chosen {
+   bootargs = "console=ttyS0,115200n8";
+   stdout-path = "serial0:115200n8";
+   };
+
+   cbb {
+   serial@311 {
+   status = "okay";
+   };
+
+   /* SDMMC1 (SD/MMC) */
+   sdhci@340 {
+/*
+   cd-gpios = < TEGRA194_MAIN_GPIO(A, 0) 
GPIO_ACTIVE_LOW>;
+*/
+   };
+
+   /* SDMMC4 (eMMC) */
+   sdhci@346 {
+   status = "okay";
+   bus-width = <8>;
+   non-removable;
+
+   vqmmc-supply = <_1v8ls>;
+   vmmc-supply = <_emmc_3v3>;
+   };
+
+   pmc@c36 {
+   nvidia,invert-interrupt;
+   };
+   };
+
+   bpmp {
+   i2c {
+   status = "okay";
+
+   pmic: pmic@3c {
+   compatible = "maxim,max20024";
+   reg = <0x3c>;
+
+   interrupts = ;
+   #interrupt-cells = <2>;
+   interrupt-controller;
+
+   #gpio-cells = <2>;
+   gpio-controller;
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_default>;
+
+   max20024_default: pinmux {
+   gpio0 {
+   pins = "gpio0";
+   function = "gpio";
+   };
+
+   gpio1 {
+   pins = "gpio1";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio2 {
+   pins = "gpio2";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio3 {
+   pins = "gpio3";
+   function = "fps-out";
+  

[PATCH v4 3/7] soc/tegra: pmc: Add Tegra194 compatibility string

2018-02-20 Thread Mikko Perttunen
The Tegra194 PMC is mostly compatible with Tegra186, including in all
currently supported features. As such, add a new compatibility string
but point to the existing Tegra186 SoC data for now.

Signed-off-by: Mikko Perttunen 
---
 drivers/soc/tegra/pmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index ce62a47a6647..a2df230bf51a 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -1920,6 +1920,7 @@ static const struct tegra_pmc_soc tegra186_pmc_soc = {
 };
 
 static const struct of_device_id tegra_pmc_match[] = {
+   { .compatible = "nvidia,tegra194-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra186-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra210-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra132-pmc", .data = _pmc_soc },
-- 
2.16.1



[PATCH v4 5/7] dt-bindings: tegra: Add documentation for nvidia,tegra194-pmc

2018-02-20 Thread Mikko Perttunen
The Tegra194 power management controller has one additional register
aperture to be specified in the device tree node.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Rob Herring 
---
 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
index 078a58b0302f..5a3bf7c5a7a0 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
@@ -3,6 +3,7 @@ NVIDIA Tegra Power Management Controller (PMC)
 Required properties:
 - compatible: Should contain one of the following:
   - "nvidia,tegra186-pmc": for Tegra186
+  - "nvidia,tegra194-pmc": for Tegra194
 - reg: Must contain an (offset, length) pair of the register set for each
   entry in reg-names.
 - reg-names: Must include the following entries:
@@ -10,6 +11,7 @@ Required properties:
   - "wake"
   - "aotag"
   - "scratch"
+  - "misc" (Only for Tegra194)
 
 Optional properties:
 - nvidia,invert-interrupt: If present, inverts the PMU interrupt signal.
-- 
2.16.1



[PATCH v4 6/7] arm64: tegra: Add Tegra194 chip device tree

2018-02-20 Thread Mikko Perttunen
Add the chip-level device tree, including binding headers, for the
NVIDIA Tegra194 "Xavier" system-on-chip. Only a small subset of devices
are initially available, enough to boot to UART console.

Signed-off-by: Mikko Perttunen 
---

Notes:
v4:
- fixed copyright headers according to license-rules.rst
- removed comments from clock bindings

v3:
- added hypervisor-related apertures to GIC node
- removed GPL boilerplate in favor of SPDX and harmonized
  copyright headers

 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 344 +
 include/dt-bindings/clock/tegra194-clock.h | 321 +++
 include/dt-bindings/gpio/tegra194-gpio.h   |  61 +
 include/dt-bindings/power/tegra194-powergate.h |  35 +++
 include/dt-bindings/reset/tegra194-reset.h | 152 +++
 5 files changed, 913 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/power/tegra194-powergate.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
new file mode 100644
index ..6322ef265c2f
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/ {
+   compatible = "nvidia,tegra194";
+   interrupt-parent = <>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   /* control backbone */
+   cbb {
+   compatible = "simple-bus";
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0x0 0x0 0x0 0x4000>;
+
+   uarta: serial@310 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0310 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTA>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTA>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartb: serial@311 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0311 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTB>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTB>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartd: serial@313 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0313 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTD>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTD>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uarte: serial@314 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0314 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTE>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTE>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartf: serial@315 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0315 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTF>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTF>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   gen1_i2c: 

[PATCH v4 4/7] dt-bindings: tegra: Add missing chips and NVIDIA boards

2018-02-20 Thread Mikko Perttunen
Add compatibility strings for supported but undocumented Tegra chips
(Tegra114/124/132/210/186/194) and reference boards.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Rob Herring 
---

Notes:
v2:
- add patch

 Documentation/devicetree/bindings/arm/tegra.txt | 16 
 1 file changed, 16 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/tegra.txt 
b/Documentation/devicetree/bindings/arm/tegra.txt
index 7f1411bbabf7..32f62bb7006d 100644
--- a/Documentation/devicetree/bindings/arm/tegra.txt
+++ b/Documentation/devicetree/bindings/arm/tegra.txt
@@ -9,6 +9,12 @@ following compatible values:
 
   nvidia,tegra20
   nvidia,tegra30
+  nvidia,tegra114
+  nvidia,tegra124
+  nvidia,tegra132
+  nvidia,tegra210
+  nvidia,tegra186
+  nvidia,tegra194
 
 Boards
 ---
@@ -26,8 +32,18 @@ board-specific compatible values:
   nvidia,cardhu
   nvidia,cardhu-a02
   nvidia,cardhu-a04
+  nvidia,dalmore
   nvidia,harmony
+  nvidia,jetson-tk1
+  nvidia,norrin
+  nvidia,p2371-
+  nvidia,p2371-2180
+  nvidia,p2571
+  nvidia,p2771-
+  nvidia,p2972-
+  nvidia,roth
   nvidia,seaboard
+  nvidia,tn7
   nvidia,ventana
   toradex,apalis_t30
   toradex,apalis_t30-eval
-- 
2.16.1



[PATCH v4 2/7] soc/tegra: Add Tegra194 SoC configuration option

2018-02-20 Thread Mikko Perttunen
Add the configuration option to enable support for the Tegra194
system-on-chip, and enable it by default in the arm64 defconfig.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/configs/defconfig |  1 +
 drivers/soc/tegra/Kconfig| 10 ++
 2 files changed, 11 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 78f669a21a9b..5a8f15baa850 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -537,6 +537,7 @@ CONFIG_ROCKCHIP_PM_DOMAINS=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
 CONFIG_ARCH_TEGRA_186_SOC=y
+CONFIG_ARCH_TEGRA_194_SOC=y
 CONFIG_EXTCON_USB_GPIO=y
 CONFIG_IIO=y
 CONFIG_EXYNOS_ADC=y
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
index 89ebe22a3e27..fe4481676da6 100644
--- a/drivers/soc/tegra/Kconfig
+++ b/drivers/soc/tegra/Kconfig
@@ -104,6 +104,16 @@ config ARCH_TEGRA_186_SOC
  multi-format support, ISP for image capture processing and BPMP for
  power management.
 
+config ARCH_TEGRA_194_SOC
+   bool "NVIDIA Tegra194 SoC"
+   select MAILBOX
+   select TEGRA_BPMP
+   select TEGRA_HSP_MBOX
+   select TEGRA_IVC
+   select SOC_TEGRA_PMC
+   help
+ Enable support for the NVIDIA Tegra194 SoC.
+
 endif
 endif
 
-- 
2.16.1



[PATCH v2 2/7] soc/tegra: Add Tegra194 SoC configuration option

2018-02-05 Thread Mikko Perttunen
Add the configuration option to enable support for the Tegra194
system-on-chip, and enable it by default in the arm64 defconfig.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/configs/defconfig |  1 +
 drivers/soc/tegra/Kconfig| 10 ++
 2 files changed, 11 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 78f669a21a9b..5a8f15baa850 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -537,6 +537,7 @@ CONFIG_ROCKCHIP_PM_DOMAINS=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
 CONFIG_ARCH_TEGRA_186_SOC=y
+CONFIG_ARCH_TEGRA_194_SOC=y
 CONFIG_EXTCON_USB_GPIO=y
 CONFIG_IIO=y
 CONFIG_EXYNOS_ADC=y
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
index 89ebe22a3e27..fe4481676da6 100644
--- a/drivers/soc/tegra/Kconfig
+++ b/drivers/soc/tegra/Kconfig
@@ -104,6 +104,16 @@ config ARCH_TEGRA_186_SOC
  multi-format support, ISP for image capture processing and BPMP for
  power management.
 
+config ARCH_TEGRA_194_SOC
+   bool "NVIDIA Tegra194 SoC"
+   select MAILBOX
+   select TEGRA_BPMP
+   select TEGRA_HSP_MBOX
+   select TEGRA_IVC
+   select SOC_TEGRA_PMC
+   help
+ Enable support for the NVIDIA Tegra194 SoC.
+
 endif
 endif
 
-- 
2.1.4



[PATCH v2 0/7] Initial support for NVIDIA Tegra194

2018-02-05 Thread Mikko Perttunen
Hello everyone,

this series adds initial support for the NVIDIA Tegra194 "Xavier"
system-on-chip. Initially UART, I2C, SDMMC, as well as the PMIC
are supported, allowing booting to a console.

The changes consist almost completely of the new device trees,
however some fixes are required in the BPMP driver to support the
new channel layout in Tegra194.

The series has been tested on Tegra186 (Jetson TX2) and Tegra194
(P2972).

Cheers,
Mikko

Mikko Perttunen (7):
  firmware: tegra: Simplify channel management
  soc/tegra: Add Tegra194 SoC configuration option
  soc/tegra: pmc: Add Tegra194 compatibility string
  dt-bindings: tegra: Add missing chips and NVIDIA boards
  dt-bindings: tegra: Add documentation for nvidia,tegra194-pmc
  arm64: tegra: Add Tegra194 chip device tree
  arm64: tegra: Add device tree for the Tegra194 P2972- board

 Documentation/devicetree/bindings/arm/tegra.txt|  16 +
 .../bindings/arm/tegra/nvidia,tegra186-pmc.txt |   2 +
 arch/arm64/boot/dts/nvidia/Makefile|   1 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 248 
 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts |  16 +
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 342 +++
 arch/arm64/configs/defconfig   |   1 +
 drivers/firmware/tegra/bpmp.c  | 142 ++---
 drivers/soc/tegra/Kconfig  |  10 +
 drivers/soc/tegra/pmc.c|   1 +
 include/dt-bindings/clock/tegra194-clock.h | 664 +
 include/dt-bindings/gpio/tegra194-gpio.h   |  59 ++
 include/dt-bindings/power/tegra194-powergate.h |  49 ++
 include/dt-bindings/reset/tegra194-reset.h | 166 ++
 include/soc/tegra/bpmp.h   |   4 +-
 15 files changed, 1641 insertions(+), 80 deletions(-)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/power/tegra194-powergate.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

-- 
2.1.4



[PATCH v2 4/7] dt-bindings: tegra: Add missing chips and NVIDIA boards

2018-02-05 Thread Mikko Perttunen
Add compatibility strings for supported but undocumented Tegra chips
(Tegra114/124/132/210/186/194) and reference boards.

Signed-off-by: Mikko Perttunen 
---

Notes:
v2:
- add patch

 Documentation/devicetree/bindings/arm/tegra.txt | 16 
 1 file changed, 16 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/tegra.txt 
b/Documentation/devicetree/bindings/arm/tegra.txt
index 7f1411bbabf7..32f62bb7006d 100644
--- a/Documentation/devicetree/bindings/arm/tegra.txt
+++ b/Documentation/devicetree/bindings/arm/tegra.txt
@@ -9,6 +9,12 @@ following compatible values:
 
   nvidia,tegra20
   nvidia,tegra30
+  nvidia,tegra114
+  nvidia,tegra124
+  nvidia,tegra132
+  nvidia,tegra210
+  nvidia,tegra186
+  nvidia,tegra194
 
 Boards
 ---
@@ -26,8 +32,18 @@ board-specific compatible values:
   nvidia,cardhu
   nvidia,cardhu-a02
   nvidia,cardhu-a04
+  nvidia,dalmore
   nvidia,harmony
+  nvidia,jetson-tk1
+  nvidia,norrin
+  nvidia,p2371-
+  nvidia,p2371-2180
+  nvidia,p2571
+  nvidia,p2771-
+  nvidia,p2972-
+  nvidia,roth
   nvidia,seaboard
+  nvidia,tn7
   nvidia,ventana
   toradex,apalis_t30
   toradex,apalis_t30-eval
-- 
2.1.4



[PATCH v2 7/7] arm64: tegra: Add device tree for the Tegra194 P2972-0000 board

2018-02-05 Thread Mikko Perttunen
Add device tree files for the Tegra194 P2972- development board.
The board consists of the P2888 compute module and the P2822 baseboard.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/Makefile|   1 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 248 +
 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts |  16 ++
 3 files changed, 265 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194-p2972-.dts

diff --git a/arch/arm64/boot/dts/nvidia/Makefile 
b/arch/arm64/boot/dts/nvidia/Makefile
index 676aa2f238d1..7c13d7df484e 100644
--- a/arch/arm64/boot/dts/nvidia/Makefile
+++ b/arch/arm64/boot/dts/nvidia/Makefile
@@ -5,3 +5,4 @@ dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
 dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
 dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-smaug.dtb
 dtb-$(CONFIG_ARCH_TEGRA_186_SOC) += tegra186-p2771-.dtb
+dtb-$(CONFIG_ARCH_TEGRA_194_SOC) += tegra194-p2972-.dtb
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
new file mode 100644
index ..ecb034177fc2
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tegra194.dtsi"
+
+#include 
+
+/ {
+   model = "NVIDIA Tegra194 P2888 Processor Module";
+   compatible = "nvidia,p2888", "nvidia,tegra194";
+
+   aliases {
+   sdhci0 = "/cbb/sdhci@346";
+   sdhci1 = "/cbb/sdhci@340";
+   serial0 = 
+   i2c0 = "/bpmp/i2c";
+   i2c1 = "/cbb/i2c@316";
+   i2c2 = "/cbb/i2c@c24";
+   i2c3 = "/cbb/i2c@318";
+   i2c4 = "/cbb/i2c@319";
+   i2c5 = "/cbb/i2c@31c";
+   i2c6 = "/cbb/i2c@c25";
+   i2c7 = "/cbb/i2c@31e";
+   };
+
+   chosen {
+   bootargs = "console=ttyS0,115200n8";
+   stdout-path = "serial0:115200n8";
+   };
+
+   cbb {
+   serial@311 {
+   status = "okay";
+   };
+
+   /* SDMMC1 (SD/MMC) */
+   sdhci@340 {
+/*
+   cd-gpios = < TEGRA194_MAIN_GPIO(A, 0) 
GPIO_ACTIVE_LOW>;
+*/
+   };
+
+   /* SDMMC4 (eMMC) */
+   sdhci@346 {
+   status = "okay";
+   bus-width = <8>;
+   non-removable;
+
+   vqmmc-supply = <_1v8ls>;
+   vmmc-supply = <_emmc_3v3>;
+   };
+
+   pmc@c36 {
+   nvidia,invert-interrupt;
+   };
+   };
+
+   bpmp {
+   i2c {
+   status = "okay";
+
+   pmic: pmic@3c {
+   compatible = "maxim,max20024";
+   reg = <0x3c>;
+
+   interrupts = ;
+   #interrupt-cells = <2>;
+   interrupt-controller;
+
+   #gpio-cells = <2>;
+   gpio-controller;
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_default>;
+
+   max20024_default: pinmux {
+   gpio0 {
+   pins = "gpio0";
+   function = "gpio";
+   };
+
+   gpio1 {
+   pins = "gpio1";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio2 {
+   pins = "gpio2";
+   function = "fps-out";
+   maxim,active-fps-source = 
;
+   };
+
+   gpio3 {
+   pins = "gpio3";
+   function = "fps-out";
+  

[PATCH v2 3/7] soc/tegra: pmc: Add Tegra194 compatibility string

2018-02-05 Thread Mikko Perttunen
The Tegra194 PMC is mostly compatible with Tegra186, including in all
currently supported features. As such, add a new compatibility string
but point to the existing Tegra186 SoC data for now.

Signed-off-by: Mikko Perttunen 
---
 drivers/soc/tegra/pmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index ce62a47a6647..a2df230bf51a 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -1920,6 +1920,7 @@ static const struct tegra_pmc_soc tegra186_pmc_soc = {
 };
 
 static const struct of_device_id tegra_pmc_match[] = {
+   { .compatible = "nvidia,tegra194-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra186-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra210-pmc", .data = _pmc_soc },
{ .compatible = "nvidia,tegra132-pmc", .data = _pmc_soc },
-- 
2.1.4



[PATCH v2 1/7] firmware: tegra: Simplify channel management

2018-02-05 Thread Mikko Perttunen
The Tegra194 BPMP only implements 5 channels (4 to BPMP, 1 to CCPLEX),
and they are not placed contiguously in memory. The current channel
management in the BPMP driver does not support this.

Simplify and refactor the channel management such that only one atomic
transmit channel and one receive channel are supported, and channels
are not required to be placed contiguously in memory. The same
configuration also works on T186 so we end up with less code.

Signed-off-by: Mikko Perttunen 
---
 drivers/firmware/tegra/bpmp.c | 142 +++---
 include/soc/tegra/bpmp.h  |   4 +-
 2 files changed, 66 insertions(+), 80 deletions(-)

diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index a7f461f2e650..81bc2dce8626 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -70,57 +70,20 @@ void tegra_bpmp_put(struct tegra_bpmp *bpmp)
 }
 EXPORT_SYMBOL_GPL(tegra_bpmp_put);
 
-static int tegra_bpmp_channel_get_index(struct tegra_bpmp_channel *channel)
-{
-   return channel - channel->bpmp->channels;
-}
-
 static int
 tegra_bpmp_channel_get_thread_index(struct tegra_bpmp_channel *channel)
 {
struct tegra_bpmp *bpmp = channel->bpmp;
-   unsigned int offset, count;
+   unsigned int count;
int index;
 
-   offset = bpmp->soc->channels.thread.offset;
count = bpmp->soc->channels.thread.count;
 
-   index = tegra_bpmp_channel_get_index(channel);
-   if (index < 0)
-   return index;
-
-   if (index < offset || index >= offset + count)
+   index = channel - channel->bpmp->threaded_channels;
+   if (index < 0 || index >= count)
return -EINVAL;
 
-   return index - offset;
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_thread(struct tegra_bpmp *bpmp, unsigned int index)
-{
-   unsigned int offset = bpmp->soc->channels.thread.offset;
-   unsigned int count = bpmp->soc->channels.thread.count;
-
-   if (index >= count)
-   return NULL;
-
-   return >channels[offset + index];
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_tx(struct tegra_bpmp *bpmp)
-{
-   unsigned int offset = bpmp->soc->channels.cpu_tx.offset;
-
-   return >channels[offset + smp_processor_id()];
-}
-
-static struct tegra_bpmp_channel *
-tegra_bpmp_channel_get_rx(struct tegra_bpmp *bpmp)
-{
-   unsigned int offset = bpmp->soc->channels.cpu_rx.offset;
-
-   return >channels[offset];
+   return index;
 }
 
 static bool tegra_bpmp_message_valid(const struct tegra_bpmp_message *msg)
@@ -271,11 +234,7 @@ tegra_bpmp_write_threaded(struct tegra_bpmp *bpmp, 
unsigned int mrq,
goto unlock;
}
 
-   channel = tegra_bpmp_channel_get_thread(bpmp, index);
-   if (!channel) {
-   err = -EINVAL;
-   goto unlock;
-   }
+   channel = >threaded_channels[index];
 
if (!tegra_bpmp_master_free(channel)) {
err = -EBUSY;
@@ -328,12 +287,18 @@ int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
if (!tegra_bpmp_message_valid(msg))
return -EINVAL;
 
-   channel = tegra_bpmp_channel_get_tx(bpmp);
+   channel = bpmp->tx_channel;
+
+   spin_lock(>atomic_tx_lock);
 
err = tegra_bpmp_channel_write(channel, msg->mrq, MSG_ACK,
   msg->tx.data, msg->tx.size);
-   if (err < 0)
+   if (err < 0) {
+   spin_unlock(>atomic_tx_lock);
return err;
+   }
+
+   spin_unlock(>atomic_tx_lock);
 
err = mbox_send_message(bpmp->mbox.channel, NULL);
if (err < 0)
@@ -607,7 +572,7 @@ static void tegra_bpmp_handle_rx(struct mbox_client 
*client, void *data)
unsigned int i, count;
unsigned long *busy;
 
-   channel = tegra_bpmp_channel_get_rx(bpmp);
+   channel = bpmp->rx_channel;
count = bpmp->soc->channels.thread.count;
busy = bpmp->threaded.busy;
 
@@ -619,9 +584,7 @@ static void tegra_bpmp_handle_rx(struct mbox_client 
*client, void *data)
for_each_set_bit(i, busy, count) {
struct tegra_bpmp_channel *channel;
 
-   channel = tegra_bpmp_channel_get_thread(bpmp, i);
-   if (!channel)
-   continue;
+   channel = >threaded_channels[i];
 
if (tegra_bpmp_master_acked(channel)) {
tegra_bpmp_channel_signal(channel);
@@ -698,7 +661,6 @@ static void tegra_bpmp_channel_cleanup(struct 
tegra_bpmp_channel *channel)
 
 static int tegra_bpmp_probe(struct platform_device *pdev)
 {
-   struct tegra_bpmp_channel *channel;
struct tegra_bpmp *bpmp;
unsigned int i;
char tag[32];
@@ -758,24 +720,45 @@ static int tegra_bpmp_probe(stru

[PATCH v2 5/7] dt-bindings: tegra: Add documentation for nvidia,tegra194-pmc

2018-02-05 Thread Mikko Perttunen
The Tegra194 power management controller has one additional register
aperture to be specified in the device tree node.

Signed-off-by: Mikko Perttunen 
Reviewed-by: Rob Herring 
---
 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
index 078a58b0302f..5a3bf7c5a7a0 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
@@ -3,6 +3,7 @@ NVIDIA Tegra Power Management Controller (PMC)
 Required properties:
 - compatible: Should contain one of the following:
   - "nvidia,tegra186-pmc": for Tegra186
+  - "nvidia,tegra194-pmc": for Tegra194
 - reg: Must contain an (offset, length) pair of the register set for each
   entry in reg-names.
 - reg-names: Must include the following entries:
@@ -10,6 +11,7 @@ Required properties:
   - "wake"
   - "aotag"
   - "scratch"
+  - "misc" (Only for Tegra194)
 
 Optional properties:
 - nvidia,invert-interrupt: If present, inverts the PMU interrupt signal.
-- 
2.1.4



[PATCH v2 6/7] arm64: tegra: Add Tegra194 chip device tree

2018-02-05 Thread Mikko Perttunen
Add the chip-level device tree, including binding headers, for the
NVIDIA Tegra194 "Xavier" system-on-chip. Only a small subset of devices
are initially available, enough to boot to UART console.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 342 +
 include/dt-bindings/clock/tegra194-clock.h | 664 +
 include/dt-bindings/gpio/tegra194-gpio.h   |  59 +++
 include/dt-bindings/power/tegra194-powergate.h |  49 ++
 include/dt-bindings/reset/tegra194-reset.h | 166 +++
 5 files changed, 1280 insertions(+)
 create mode 100644 arch/arm64/boot/dts/nvidia/tegra194.dtsi
 create mode 100644 include/dt-bindings/clock/tegra194-clock.h
 create mode 100644 include/dt-bindings/gpio/tegra194-gpio.h
 create mode 100644 include/dt-bindings/power/tegra194-powergate.h
 create mode 100644 include/dt-bindings/reset/tegra194-reset.h

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
new file mode 100644
index ..dda28d758cab
--- /dev/null
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/ {
+   compatible = "nvidia,tegra194";
+   interrupt-parent = <>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   /* control backbone */
+   cbb {
+   compatible = "simple-bus";
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0x0 0x0 0x0 0x4000>;
+
+   uarta: serial@310 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0310 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTA>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTA>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartb: serial@311 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0311 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTB>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTB>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartd: serial@313 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0313 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTD>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTD>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uarte: serial@314 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0314 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTE>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTE>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   uartf: serial@315 {
+   compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
+   reg = <0x0315 0x40>;
+   reg-shift = <2>;
+   interrupts = ;
+   clocks = < TEGRA194_CLK_UARTF>;
+   clock-names = "serial";
+   resets = < TEGRA194_RESET_UARTF>;
+   reset-names = "serial";
+   status = "disabled";
+   };
+
+   gen1_i2c: i2c@316 {
+   compatible = "nvidia,tegra194-i2c", 
"nvidia,tegra114-i2c";
+   reg = <0x0316 0x1>;
+   interrupts = ;
+   #address-cells = <1>;
+ 

<    4   5   6   7   8   9   10   11   12   13   >