diff --git a/arch/arm/include/asm/arch-tegra124/flow.h b/arch/arm/include/asm/arch-tegra124/flow.h
index 0db1881bc6fbe4db583886ccbc7f3f99071c7103..d6f515f1e98703f488b2007ca371415a0d59c54a 100644
--- a/arch/arm/include/asm/arch-tegra124/flow.h
+++ b/arch/arm/include/asm/arch-tegra124/flow.h
@@ -37,4 +37,10 @@ struct flow_ctlr {
 /* FLOW_CTLR_CLUSTER_CONTROL_0 0x2c */
 #define ACTIVE_LP		(1 << 0)
 
+/* CPUn_CSR_0 */
+#define CSR_ENABLE		(1 << 0)
+#define CSR_IMMEDIATE_WAKE	(1 << 3)
+#define CSR_WAIT_WFI_SHIFT	8
+#define CSR_PWR_OFF_STS		(1 << 16)
+
 #endif	/*  _TEGRA124_FLOW_H_ */
diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile
index 68eec5c579bc8fd6b9c9058d437401d47b33df5c..1a3e24d4361817afb78a5678e7c29699a2f4f357 100644
--- a/arch/arm/mach-tegra/Makefile
+++ b/arch/arm/mach-tegra/Makefile
@@ -26,6 +26,10 @@ obj-y += xusb-padctl.o
 obj-$(CONFIG_DISPLAY_CPUINFO) += sys_info.o
 obj-$(CONFIG_TEGRA124) += vpr.o
 
+ifndef CONFIG_SPL_BUILD
+obj-$(CONFIG_ARMV7_PSCI) += psci.o
+endif
+
 obj-$(CONFIG_TEGRA20) += tegra20/
 obj-$(CONFIG_TEGRA30) += tegra30/
 obj-$(CONFIG_TEGRA114) += tegra114/
diff --git a/arch/arm/mach-tegra/psci.S b/arch/arm/mach-tegra/psci.S
new file mode 100644
index 0000000000000000000000000000000000000000..e4733e638351462a781a47209f40862dbd54aa62
--- /dev/null
+++ b/arch/arm/mach-tegra/psci.S
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014, NVIDIA
+ * Copyright (C) 2015, Siemens AG
+ *
+ * Authors:
+ *  Thierry Reding <treding@nvidia.com>
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <linux/linkage.h>
+#include <asm/macro.h>
+#include <asm/psci.h>
+
+	.pushsection ._secure.text, "ax"
+	.arch_extension sec
+
+#define TEGRA_SB_CSR_0			0x6000c200
+#define NS_RST_VEC_WR_DIS		(1 << 1)
+
+#define TEGRA_RESET_EXCEPTION_VECTOR	0x6000f100
+
+#define TEGRA_FLOW_CTRL_BASE		0x60007000
+#define FLOW_CTRL_CPU_CSR		0x08
+#define CSR_ENABLE			(1 << 0)
+#define CSR_IMMEDIATE_WAKE		(1 << 3)
+#define CSR_WAIT_WFI_SHIFT		8
+#define FLOW_CTRL_CPU1_CSR		0x18
+
+@ converts CPU ID into FLOW_CTRL_CPUn_CSR offset
+.macro get_csr_reg cpu, ofs, tmp
+	cmp	\cpu, #0		@ CPU0?
+	lsl	\tmp, \cpu, #3	@ multiple by 8 (register offset CPU1-3)
+	moveq	\ofs, #FLOW_CTRL_CPU_CSR
+	addne	\ofs, \tmp, #FLOW_CTRL_CPU1_CSR - 8
+.endm
+
+ENTRY(psci_arch_init)
+	mov	r6, lr
+
+	mrc	p15, 0, r5, c1, c1, 0	@ Read SCR
+	bic	r5, r5, #1		@ Secure mode
+	mcr	p15, 0, r5, c1, c1, 0	@ Write SCR
+	isb
+
+	@ lock reset vector for non-secure
+	ldr	r4, =TEGRA_SB_CSR_0
+	ldr	r5, [r4]
+	orr	r5, r5, #NS_RST_VEC_WR_DIS
+	str	r5, [r4]
+
+	bl	psci_get_cpu_id		@ CPU ID => r0
+	bl	psci_get_cpu_stack_top	@ stack top => r0
+	mov	sp, r0
+
+	bx	r6
+ENDPROC(psci_arch_init)
+
+ENTRY(psci_cpu_off)
+	bl	psci_cpu_off_common
+
+	bl	psci_get_cpu_id		@ CPU ID => r0
+
+	get_csr_reg r0, r2, r3
+
+	ldr	r6, =TEGRA_FLOW_CTRL_BASE
+	mov	r5, #(CSR_ENABLE)
+	mov	r4, #(1 << CSR_WAIT_WFI_SHIFT)
+	add	r5, r4, lsl r0
+	str	r5, [r6, r2]
+
+_loop:	wfi
+	b	_loop
+ENDPROC(psci_cpu_off)
+
+ENTRY(psci_cpu_on)
+	push	{lr}
+
+	mov	r0, r1
+	bl	psci_get_cpu_stack_top	@ get stack top of target CPU
+	str	r2, [r0]		@ store target PC at stack top
+	dsb
+
+	ldr	r6, =TEGRA_RESET_EXCEPTION_VECTOR
+	ldr	r5, =psci_cpu_entry
+	str	r5, [r6]
+
+	get_csr_reg r1, r2, r3
+
+	ldr	r6, =TEGRA_FLOW_CTRL_BASE
+	mov	r5, #(CSR_IMMEDIATE_WAKE | CSR_ENABLE)
+	str	r5, [r6, r2]
+
+	mov	r0, #ARM_PSCI_RET_SUCCESS	@ Return PSCI_RET_SUCCESS
+	pop	{pc}
+ENDPROC(psci_cpu_on)
+
+	.globl psci_text_end
+psci_text_end:
+	.popsection
diff --git a/arch/arm/mach-tegra/tegra124/Makefile b/arch/arm/mach-tegra/tegra124/Makefile
index ef2da29f3085c4c3e4da20a4115e2685d9ccb661..f577f459be0f207623ec3494d66697c5955bc59b 100644
--- a/arch/arm/mach-tegra/tegra124/Makefile
+++ b/arch/arm/mach-tegra/tegra124/Makefile
@@ -11,3 +11,7 @@ obj-y	+= clock.o
 obj-y	+= funcmux.o
 obj-y	+= pinmux.o
 obj-y	+= xusb-padctl.o
+
+ifndef CONFIG_SPL_BUILD
+obj-$(CONFIG_ARMV7_NONSEC) += psci.o
+endif
diff --git a/arch/arm/mach-tegra/tegra124/psci.c b/arch/arm/mach-tegra/tegra124/psci.c
new file mode 100644
index 0000000000000000000000000000000000000000..16d196508c6e93546d21da4b963be7b1eada002d
--- /dev/null
+++ b/arch/arm/mach-tegra/tegra124/psci.c
@@ -0,0 +1,59 @@
+/*
+ * (C) Copyright 2015, Siemens AG
+ * Author: Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/psci.h>
+#include <asm/arch/flow.h>
+#include <asm/arch/powergate.h>
+#include <asm/arch-tegra/ap.h>
+#include <asm/arch-tegra/pmc.h>
+
+static void park_cpu(void)
+{
+	while (1)
+		asm volatile("wfi");
+}
+
+/**
+ * Initialize power management for application processors
+ */
+void psci_board_init(void)
+{
+	struct flow_ctlr *flow = (struct flow_ctlr *)NV_PA_FLOW_BASE;
+
+	writel((u32)park_cpu, EXCEP_VECTOR_CPU_RESET_VECTOR);
+
+	/*
+	 * The naturally expected order of putting these CPUs under Flow
+	 * Controller regime would be
+	 *  - configure the Flow Controller
+	 *  - power up the CPUs
+	 *  - wait for the CPUs to hit wfi and be powered down again
+	 *
+	 * However, this doesn't work in practice. We rather need to power them
+	 * up first and park them in wfi. While they are waiting there, we can
+	 * indeed program the Flow Controller to powergate them on wfi, which
+	 * will then happen immediately as they are already in that state.
+	 */
+	tegra_powergate_power_on(TEGRA_POWERGATE_CPU1);
+	tegra_powergate_power_on(TEGRA_POWERGATE_CPU2);
+	tegra_powergate_power_on(TEGRA_POWERGATE_CPU3);
+
+	writel((2 << CSR_WAIT_WFI_SHIFT) | CSR_ENABLE, &flow->cpu1_csr);
+	writel((4 << CSR_WAIT_WFI_SHIFT) | CSR_ENABLE, &flow->cpu2_csr);
+	writel((8 << CSR_WAIT_WFI_SHIFT) | CSR_ENABLE, &flow->cpu3_csr);
+
+	writel(EVENT_MODE_STOP, &flow->halt_cpu1_events);
+	writel(EVENT_MODE_STOP, &flow->halt_cpu2_events);
+	writel(EVENT_MODE_STOP, &flow->halt_cpu3_events);
+
+	while (!(readl(&flow->cpu1_csr) & CSR_PWR_OFF_STS) ||
+		!(readl(&flow->cpu2_csr) & CSR_PWR_OFF_STS) ||
+		!(readl(&flow->cpu3_csr) & CSR_PWR_OFF_STS))
+		/* wait */;
+}