diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
index 9249537c65ac2b8d4f6d5ee5bb0b84f4ca5d533e..f920eebc56b8eddb77c3bf8fb0c9b4b07a7cfdd2 100644
--- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile
+++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
@@ -7,3 +7,5 @@
 obj-y += cpu.o
 obj-y += lowlevel.o
 obj-y += speed.o
+obj-$(CONFIG_MP) += mp.o
+obj-$(CONFIG_OF_LIBFDT) += fdt.o
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
index c129d032f4094bc7abfb749945035e9b6009552b..47b947f44fadb466da62872b82cacb103d45937e 100644
--- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
+++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
@@ -11,6 +11,7 @@
 #include <asm/io.h>
 #include <asm/arch-fsl-lsch3/immap_lsch3.h>
 #include "cpu.h"
+#include "mp.h"
 #include "speed.h"
 #include <fsl_mc.h>
 
@@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis)
 #endif
 	return error;
 }
+
+
+int arch_early_init_r(void)
+{
+	int rv;
+	rv = fsl_lsch3_wake_seconday_cores();
+
+	if (rv)
+		printf("Did not wake secondary cores\n");
+
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
index 28544d7c14d84266e5131a5377dd4529425f6b66..2e3312b99beec55f38d76b895c199750f3a67fac 100644
--- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
+++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
@@ -5,3 +5,4 @@
  */
 
 int fsl_qoriq_core_to_cluster(unsigned int core);
+u32 cpu_mask(void);
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c
new file mode 100644
index 0000000000000000000000000000000000000000..e392eb91496b8cc58faeb563c4849a473128f8c2
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <libfdt.h>
+#include <fdt_support.h>
+#include "mp.h"
+
+#ifdef CONFIG_MP
+void ft_fixup_cpu(void *blob)
+{
+	int off;
+	__maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
+	fdt32_t *reg;
+	int addr_cells;
+	u64 val;
+	size_t *boot_code_size = &(__secondary_boot_code_size);
+
+	off = fdt_path_offset(blob, "/cpus");
+	if (off < 0) {
+		puts("couldn't find /cpus node\n");
+		return;
+	}
+	of_bus_default_count_cells(blob, off, &addr_cells, NULL);
+
+	off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", 4);
+	while (off != -FDT_ERR_NOTFOUND) {
+		reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0);
+		if (reg) {
+			val = spin_tbl_addr;
+			val += id_to_core(of_read_number(reg, addr_cells))
+				* SPIN_TABLE_ELEM_SIZE;
+			val = cpu_to_fdt64(val);
+			fdt_setprop_string(blob, off, "enable-method",
+					   "spin-table");
+			fdt_setprop(blob, off, "cpu-release-addr",
+				    &val, sizeof(val));
+		} else {
+			puts("Warning: found cpu node without reg property\n");
+		}
+		off = fdt_node_offset_by_prop_value(blob, off, "device_type",
+						    "cpu", 4);
+	}
+
+	fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code,
+			*boot_code_size);
+}
+#endif
+
+void ft_cpu_setup(void *blob, bd_t *bd)
+{
+#ifdef CONFIG_MP
+	ft_fixup_cpu(blob);
+#endif
+}
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
index ad32b6cd52071fb9105ad72d96ed8ead85acbe7f..2a88aab283d7e2438017cee238fb732acd6c3352 100644
--- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
+++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
@@ -8,7 +8,9 @@
 
 #include <config.h>
 #include <linux/linkage.h>
+#include <asm/gic.h>
 #include <asm/macro.h>
+#include "mp.h"
 
 ENTRY(lowlevel_init)
 	mov	x29, lr			/* Save LR */
@@ -35,31 +37,114 @@ ENTRY(lowlevel_init)
 #endif
 #endif
 
-	branch_if_master x0, x1, 1f
+	branch_if_master x0, x1, 2f
 
+	ldr	x0, =secondary_boot_func
+	blr	x0
+2:
+	mov	lr, x29			/* Restore LR */
+	ret
+ENDPROC(lowlevel_init)
+
+	/* Keep literals not used by the secondary boot code outside it */
+	.ltorg
+
+	/* Using 64 bit alignment since the spin table is accessed as data */
+	.align 4
+	.global secondary_boot_code
+	/* Secondary Boot Code starts here */
+secondary_boot_code:
+	.global __spin_table
+__spin_table:
+	.space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE
+
+	.align 2
+ENTRY(secondary_boot_func)
 	/*
-	 * Slave should wait for master clearing spin table.
-	 * This sync prevent salves observing incorrect
-	 * value of spin table and jumping to wrong place.
+	 * MPIDR_EL1 Fields:
+	 * MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1)
+	 * MPIDR[7:2] = AFF0_RES
+	 * MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3)
+	 * MPIDR[23:16] = AFF2_CLUSTERID
+	 * MPIDR[24] = MT
+	 * MPIDR[29:25] = RES0
+	 * MPIDR[30] = U
+	 * MPIDR[31] = ME
+	 * MPIDR[39:32] = AFF3
+	 *
+	 * Linear Processor ID (LPID) calculation from MPIDR_EL1:
+	 * (We only use AFF0_CPUID and AFF1_CLUSTERID for now
+	 * until AFF2_CLUSTERID and AFF3 have non-zero values)
+	 *
+	 * LPID = MPIDR[15:8] | MPIDR[1:0]
 	 */
-#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
-#ifdef CONFIG_GICV2
-	ldr	x0, =GICC_BASE
-#endif
-	bl	gic_wait_for_interrupt
-#endif
-
+	mrs	x0, mpidr_el1
+	ubfm	x1, x0, #8, #15
+	ubfm	x2, x0, #0, #1
+	orr	x10, x2, x1, lsl #2	/* x10 has LPID */
+	ubfm    x9, x0, #0, #15         /* x9 contains MPIDR[15:0] */
 	/*
-	 * All processors will enter EL2 and optionally EL1.
+	 * offset of the spin table element for this core from start of spin
+	 * table (each elem is padded to 64 bytes)
 	 */
-	bl	armv8_switch_to_el2
+	lsl	x1, x10, #6
+	ldr	x0, =__spin_table
+	/* physical address of this cpus spin table element */
+	add	x11, x1, x0
+
+	str	x9, [x11, #16]	/* LPID */
+	mov	x4, #1
+	str	x4, [x11, #8]	/* STATUS */
+	dsb	sy
+#if defined(CONFIG_GICV3)
+	gic_wait_for_interrupt_m x0
+#elif defined(CONFIG_GICV2)
+        ldr     x0, =GICC_BASE
+        gic_wait_for_interrupt_m x0, w1
+#endif
+
+	bl secondary_switch_to_el2
 #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
-	bl	armv8_switch_to_el1
+	bl secondary_switch_to_el1
 #endif
-	b	2f
 
-1:
-2:
-	mov	lr, x29			/* Restore LR */
-	ret
-ENDPROC(lowlevel_init)
+slave_cpu:
+	wfe
+	ldr	x0, [x11]
+	cbz	x0, slave_cpu
+#ifndef CONFIG_ARMV8_SWITCH_TO_EL1
+	mrs     x1, sctlr_el2
+#else
+	mrs     x1, sctlr_el1
+#endif
+	tbz     x1, #25, cpu_is_le
+	rev     x0, x0                  /* BE to LE conversion */
+cpu_is_le:
+	br	x0			/* branch to the given address */
+ENDPROC(secondary_boot_func)
+
+ENTRY(secondary_switch_to_el2)
+	switch_el x0, 1f, 0f, 0f
+0:	ret
+1:	armv8_switch_to_el2_m x0
+ENDPROC(secondary_switch_to_el2)
+
+ENTRY(secondary_switch_to_el1)
+	switch_el x0, 0f, 1f, 0f
+0:	ret
+1:	armv8_switch_to_el1_m x0, x1
+ENDPROC(secondary_switch_to_el1)
+
+	/* Ensure that the literals used by the secondary boot code are
+	 * assembled within it (this is required so that we can protect
+	 * this area with a single memreserve region
+	 */
+	.ltorg
+
+	/* 64 bit alignment for elements accessed as data */
+	.align 4
+	.globl __secondary_boot_code_size
+	.type __secondary_boot_code_size, %object
+	/* Secondary Boot Code ends here */
+__secondary_boot_code_size:
+	.quad .-secondary_boot_code
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c
new file mode 100644
index 0000000000000000000000000000000000000000..94998bf37bbcfa2c90a8d39346375c59b9cf8a98
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/arch-fsl-lsch3/immap_lsch3.h>
+#include "mp.h"
+
+DECLARE_GLOBAL_DATA_PTR;
+
+void *get_spin_tbl_addr(void)
+{
+	return &__spin_table;
+}
+
+phys_addr_t determine_mp_bootpg(void)
+{
+	return (phys_addr_t)&secondary_boot_code;
+}
+
+int fsl_lsch3_wake_seconday_cores(void)
+{
+	struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+	struct ccsr_reset __iomem *rst = (void *)(CONFIG_SYS_FSL_RST_ADDR);
+	u32 cores, cpu_up_mask = 1;
+	int i, timeout = 10;
+	u64 *table = get_spin_tbl_addr();
+
+	cores = cpu_mask();
+	/* Clear spin table so that secondary processors
+	 * observe the correct value after waking up from wfe.
+	 */
+	memset(table, 0, CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE);
+	flush_dcache_range((unsigned long)table,
+			   (unsigned long)table +
+			   (CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE));
+
+	printf("Waking secondary cores to start from %lx\n", gd->relocaddr);
+	out_le32(&gur->bootlocptrh, (u32)(gd->relocaddr >> 32));
+	out_le32(&gur->bootlocptrl, (u32)gd->relocaddr);
+	out_le32(&gur->scratchrw[6], 1);
+	asm volatile("dsb st" : : : "memory");
+	rst->brrl = cores;
+	asm volatile("dsb st" : : : "memory");
+
+	/* This is needed as a precautionary measure.
+	 * If some code before this has accidentally  released the secondary
+	 * cores then the pre-bootloader code will trap them in a "wfe" unless
+	 * the scratchrw[6] is set. In this case we need a sev here to get these
+	 * cores moving again.
+	 */
+	asm volatile("sev");
+
+	while (timeout--) {
+		flush_dcache_range((unsigned long)table, (unsigned long)table +
+				   CONFIG_MAX_CPUS * 64);
+		for (i = 1; i < CONFIG_MAX_CPUS; i++) {
+			if (table[i * WORDS_PER_SPIN_TABLE_ENTRY +
+					SPIN_TABLE_ELEM_STATUS_IDX])
+				cpu_up_mask |= 1 << i;
+		}
+		if (hweight32(cpu_up_mask) == hweight32(cores))
+			break;
+		udelay(10);
+	}
+	if (timeout <= 0) {
+		printf("Not all cores (0x%x) are up (0x%x)\n",
+		       cores, cpu_up_mask);
+		return 1;
+	}
+	printf("All (%d) cores are up.\n", hweight32(cores));
+
+	return 0;
+}
+
+int is_core_valid(unsigned int core)
+{
+	return !!((1 << core) & cpu_mask());
+}
+
+int cpu_reset(int nr)
+{
+	puts("Feature is not implemented.\n");
+
+	return 0;
+}
+
+int cpu_disable(int nr)
+{
+	puts("Feature is not implemented.\n");
+
+	return 0;
+}
+
+int core_to_pos(int nr)
+{
+	u32 cores = cpu_mask();
+	int i, count = 0;
+
+	if (nr == 0) {
+		return 0;
+	} else if (nr >= hweight32(cores)) {
+		puts("Not a valid core number.\n");
+		return -1;
+	}
+
+	for (i = 1; i < 32; i++) {
+		if (is_core_valid(i)) {
+			count++;
+			if (count == nr)
+				break;
+		}
+	}
+
+	return count;
+}
+
+int cpu_status(int nr)
+{
+	u64 *table;
+	int pos;
+
+	if (nr == 0) {
+		table = (u64 *)get_spin_tbl_addr();
+		printf("table base @ 0x%p\n", table);
+	} else {
+		pos = core_to_pos(nr);
+		if (pos < 0)
+			return -1;
+		table = (u64 *)get_spin_tbl_addr() + pos *
+			WORDS_PER_SPIN_TABLE_ENTRY;
+		printf("table @ 0x%p\n", table);
+		printf("   addr - 0x%016llx\n",
+		       table[SPIN_TABLE_ELEM_ENTRY_ADDR_IDX]);
+		printf("   status   - 0x%016llx\n",
+		       table[SPIN_TABLE_ELEM_STATUS_IDX]);
+		printf("   lpid  - 0x%016llx\n",
+		       table[SPIN_TABLE_ELEM_LPID_IDX]);
+	}
+
+	return 0;
+}
+
+int cpu_release(int nr, int argc, char * const argv[])
+{
+	u64 boot_addr;
+	u64 *table = (u64 *)get_spin_tbl_addr();
+	int pos;
+
+	pos = core_to_pos(nr);
+	if (pos <= 0)
+		return -1;
+
+	table += pos * WORDS_PER_SPIN_TABLE_ENTRY;
+	boot_addr = simple_strtoull(argv[0], NULL, 16);
+	table[SPIN_TABLE_ELEM_ENTRY_ADDR_IDX] = boot_addr;
+	flush_dcache_range((unsigned long)table,
+			   (unsigned long)table + SPIN_TABLE_ELEM_SIZE);
+	asm volatile("dsb st");
+	smp_kick_all_cpus();	/* only those with entry addr set will run */
+
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.h b/arch/arm/cpu/armv8/fsl-lsch3/mp.h
new file mode 100644
index 0000000000000000000000000000000000000000..06ac0bcf361dc38dbafc17333c99a22690ac71de
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2014, Freescale Semiconductor
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _FSL_CH3_MP_H
+#define _FSL_CH3_MP_H
+
+/*
+* Each spin table element is defined as
+* struct {
+*      uint64_t entry_addr;
+*      uint64_t status;
+*      uint64_t lpid;
+* };
+* we pad this struct to 64 bytes so each entry is in its own cacheline
+* the actual spin table is an array of these structures
+*/
+#define SPIN_TABLE_ELEM_ENTRY_ADDR_IDX	0
+#define SPIN_TABLE_ELEM_STATUS_IDX	1
+#define SPIN_TABLE_ELEM_LPID_IDX	2
+#define WORDS_PER_SPIN_TABLE_ENTRY	8	/* pad to 64 bytes */
+#define SPIN_TABLE_ELEM_SIZE		64
+
+#define id_to_core(x)	((x & 3) | (x >> 6))
+#ifndef __ASSEMBLY__
+extern u64 __spin_table[];
+extern u64 *secondary_boot_code;
+extern size_t __secondary_boot_code_size;
+int fsl_lsch3_wake_seconday_cores(void);
+void *get_spin_tbl_addr(void);
+phys_addr_t determine_mp_bootpg(void);
+void secondary_boot_func(void);
+#endif
+#endif /* _FSL_CH3_MP_H */
diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
index 38dea5c51871eb4e20abf86f67aaf5bd9c4f6b10..ade1cdead83920dea59b38ecc895552c18fd442a 100644
--- a/arch/arm/cpu/armv8/transition.S
+++ b/arch/arm/cpu/armv8/transition.S
@@ -14,70 +14,11 @@
 ENTRY(armv8_switch_to_el2)
 	switch_el x0, 1f, 0f, 0f
 0:	ret
-1:
-	mov	x0, #0x5b1	/* Non-secure EL0/EL1 | HVC | 64bit EL2 */
-	msr	scr_el3, x0
-	msr	cptr_el3, xzr	/* Disable coprocessor traps to EL3 */
-	mov	x0, #0x33ff
-	msr	cptr_el2, x0	/* Disable coprocessor traps to EL2 */
-
-	/* Initialize SCTLR_EL2 */
-	msr	sctlr_el2, xzr
-
-	/* Return to the EL2_SP2 mode from EL3 */
-	mov	x0, sp
-	msr	sp_el2, x0	/* Migrate SP */
-	mrs	x0, vbar_el3
-	msr	vbar_el2, x0	/* Migrate VBAR */
-	mov	x0, #0x3c9
-	msr	spsr_el3, x0	/* EL2_SP2 | D | A | I | F */
-	msr	elr_el3, lr
-	eret
+1:	armv8_switch_to_el2_m x0
 ENDPROC(armv8_switch_to_el2)
 
 ENTRY(armv8_switch_to_el1)
 	switch_el x0, 0f, 1f, 0f
 0:	ret
-1:
-	/* Initialize Generic Timers */
-	mrs	x0, cnthctl_el2
-	orr	x0, x0, #0x3		/* Enable EL1 access to timers */
-	msr	cnthctl_el2, x0
-	msr	cntvoff_el2, xzr
-	mrs	x0, cntkctl_el1
-	orr	x0, x0, #0x3		/* Enable EL0 access to timers */
-	msr	cntkctl_el1, x0
-
-	/* Initilize MPID/MPIDR registers */
-	mrs	x0, midr_el1
-	mrs	x1, mpidr_el1
-	msr	vpidr_el2, x0
-	msr	vmpidr_el2, x1
-
-	/* Disable coprocessor traps */
-	mov	x0, #0x33ff
-	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
-	msr	hstr_el2, xzr		/* Disable coprocessor traps to EL2 */
-	mov	x0, #3 << 20
-	msr	cpacr_el1, x0		/* Enable FP/SIMD at EL1 */
-
-	/* Initialize HCR_EL2 */
-	mov	x0, #(1 << 31)		/* 64bit EL1 */
-	orr	x0, x0, #(1 << 29)	/* Disable HVC */
-	msr	hcr_el2, x0
-
-	/* SCTLR_EL1 initialization */
-	mov	x0, #0x0800
-	movk	x0, #0x30d0, lsl #16
-	msr	sctlr_el1, x0
-
-	/* Return to the EL1_SP1 mode from EL2 */
-	mov	x0, sp
-	msr	sp_el1, x0		/* Migrate SP */
-	mrs	x0, vbar_el2
-	msr	vbar_el1, x0		/* Migrate VBAR */
-	mov	x0, #0x3c5
-	msr	spsr_el2, x0		/* EL1_SP1 | D | A | I | F */
-	msr	elr_el2, lr
-	eret
+1:	armv8_switch_to_el1_m x0, x1
 ENDPROC(armv8_switch_to_el1)
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/config.h b/arch/arm/include/asm/arch-fsl-lsch3/config.h
index f632c82896741c69a28463ba8eda58be17cd5d84..da551e883997b4aacfc111d5af277e0e47e5354c 100644
--- a/arch/arm/include/asm/arch-fsl-lsch3/config.h
+++ b/arch/arm/include/asm/arch-fsl-lsch3/config.h
@@ -8,7 +8,7 @@
 #define _ASM_ARMV8_FSL_LSCH3_CONFIG_
 
 #include <fsl_ddrc_version.h>
-
+#define CONFIG_MP
 #define CONFIG_SYS_FSL_OCRAM_BASE	0x18000000	/* initial RAM */
 /* Link Definitions */
 #define CONFIG_SYS_INIT_SP_ADDR		(CONFIG_SYS_FSL_OCRAM_BASE + 0xfff0)
@@ -19,6 +19,7 @@
 #define CONFIG_SYS_FSL_DDR3_ADDR		0x08210000
 #define CONFIG_SYS_FSL_GUTS_ADDR		(CONFIG_SYS_IMMR + 0x00E00000)
 #define CONFIG_SYS_FSL_PMU_ADDR			(CONFIG_SYS_IMMR + 0x00E30000)
+#define CONFIG_SYS_FSL_RST_ADDR			(CONFIG_SYS_IMMR + 0x00E60000)
 #define CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR	(CONFIG_SYS_IMMR + 0x00300000)
 #define CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR	(CONFIG_SYS_IMMR + 0x00310000)
 #define CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR	(CONFIG_SYS_IMMR + 0x00370000)
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h
index 18e66bdf411af70de20624cfa0eaba2fccb2a9b4..ee1d6512d90056686775cb07368c465f30ee4088 100644
--- a/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h
+++ b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h
@@ -113,4 +113,39 @@ struct ccsr_clk_ctrl {
 		u8  res_04[0x20-0x04];
 	} clkcncsr[8];
 };
+
+struct ccsr_reset {
+	u32 rstcr;			/* 0x000 */
+	u32 rstcrsp;			/* 0x004 */
+	u8 res_008[0x10-0x08];		/* 0x008 */
+	u32 rstrqmr1;			/* 0x010 */
+	u32 rstrqmr2;			/* 0x014 */
+	u32 rstrqsr1;			/* 0x018 */
+	u32 rstrqsr2;			/* 0x01c */
+	u32 rstrqwdtmrl;		/* 0x020 */
+	u32 rstrqwdtmru;		/* 0x024 */
+	u8 res_028[0x30-0x28];		/* 0x028 */
+	u32 rstrqwdtsrl;		/* 0x030 */
+	u32 rstrqwdtsru;		/* 0x034 */
+	u8 res_038[0x60-0x38];		/* 0x038 */
+	u32 brrl;			/* 0x060 */
+	u32 brru;			/* 0x064 */
+	u8 res_068[0x80-0x68];		/* 0x068 */
+	u32 pirset;			/* 0x080 */
+	u32 pirclr;			/* 0x084 */
+	u8 res_088[0x90-0x88];		/* 0x088 */
+	u32 brcorenbr;			/* 0x090 */
+	u8 res_094[0x100-0x94];		/* 0x094 */
+	u32 rcw_reqr;			/* 0x100 */
+	u32 rcw_completion;		/* 0x104 */
+	u8 res_108[0x110-0x108];	/* 0x108 */
+	u32 pbi_reqr;			/* 0x110 */
+	u32 pbi_completion;		/* 0x114 */
+	u8 res_118[0xa00-0x118];	/* 0x118 */
+	u32 qmbm_warmrst;		/* 0xa00 */
+	u32 soc_warmrst;		/* 0xa04 */
+	u8 res_a08[0xbf8-0xa08];	/* 0xa08 */
+	u32 ip_rev1;			/* 0xbf8 */
+	u32 ip_rev2;			/* 0xbfc */
+};
 #endif /* __ARCH_FSL_LSCH3_IMMAP_H */
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index f77e4b880e48c4cba5f71d28da434e0f1e42293e..541b4432549bf38b8c99504b6299ee4d87559e98 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -105,6 +105,99 @@ lr	.req	x30
 	cbz	\xreg1, \master_label
 .endm
 
+.macro armv8_switch_to_el2_m, xreg1
+	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1 */
+	mov	\xreg1, #0x5b1
+	msr	scr_el3, \xreg1
+	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
+	mov	\xreg1, #0x33ff
+	msr	cptr_el2, \xreg1	/* Disable coprocessor traps to EL2 */
+
+	/* Initialize SCTLR_EL2
+	 *
+	 * setting RES1 bits (29,28,23,22,18,16,11,5,4) to 1
+	 * and RES0 bits (31,30,27,26,24,21,20,17,15-13,10-6) +
+	 * EE,WXN,I,SA,C,A,M to 0
+	 */
+	mov	\xreg1, #0x0830
+	movk	\xreg1, #0x30C5, lsl #16
+	msr	sctlr_el2, \xreg1
+
+	/* Return to the EL2_SP2 mode from EL3 */
+	mov	\xreg1, sp
+	msr	sp_el2, \xreg1		/* Migrate SP */
+	mrs	\xreg1, vbar_el3
+	msr	vbar_el2, \xreg1	/* Migrate VBAR */
+	mov	\xreg1, #0x3c9
+	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
+	msr	elr_el3, lr
+	eret
+.endm
+
+.macro armv8_switch_to_el1_m, xreg1, xreg2
+	/* Initialize Generic Timers */
+	mrs	\xreg1, cnthctl_el2
+	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
+	msr	cnthctl_el2, \xreg1
+	msr	cntvoff_el2, xzr
+
+	/* Initilize MPID/MPIDR registers */
+	mrs	\xreg1, midr_el1
+	mrs	\xreg2, mpidr_el1
+	msr	vpidr_el2, \xreg1
+	msr	vmpidr_el2, \xreg2
+
+	/* Disable coprocessor traps */
+	mov	\xreg1, #0x33ff
+	msr	cptr_el2, \xreg1	/* Disable coprocessor traps to EL2 */
+	msr	hstr_el2, xzr		/* Disable coprocessor traps to EL2 */
+	mov	\xreg1, #3 << 20
+	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
+
+	/* Initialize HCR_EL2 */
+	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
+	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
+	msr	hcr_el2, \xreg1
+
+	/* SCTLR_EL1 initialization
+	 *
+	 * setting RES1 bits (29,28,23,22,20,11) to 1
+	 * and RES0 bits (31,30,27,21,17,13,10,6) +
+	 * UCI,EE,EOE,WXN,nTWE,nTWI,UCT,DZE,I,UMA,SED,ITD,
+	 * CP15BEN,SA0,SA,C,A,M to 0
+	 */
+	mov	\xreg1, #0x0800
+	movk	\xreg1, #0x30d0, lsl #16
+	msr	sctlr_el1, \xreg1
+
+	/* Return to the EL1_SP1 mode from EL2 */
+	mov	\xreg1, sp
+	msr	sp_el1, \xreg1		/* Migrate SP */
+	mrs	\xreg1, vbar_el2
+	msr	vbar_el1, \xreg1	/* Migrate VBAR */
+	mov	\xreg1, #0x3c5
+	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
+	msr	elr_el2, lr
+	eret
+.endm
+
+#if defined(CONFIG_GICV3)
+.macro gic_wait_for_interrupt_m xreg1
+0 :	wfi
+	mrs     \xreg1, ICC_IAR1_EL1
+	msr     ICC_EOIR1_EL1, \xreg1
+	cbnz    \xreg1, 0b
+.endm
+#elif defined(CONFIG_GICV2)
+.macro gic_wait_for_interrupt_m xreg1, wreg2
+0 :	wfi
+	ldr     \wreg2, [\xreg1, GICC_AIAR]
+	str     \wreg2, [\xreg1, GICC_AEOIR]
+	and	\wreg2, \wreg2, #3ff
+	cbnz    \wreg2, 0b
+.endm
+#endif
+
 #endif /* CONFIG_ARM64 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm/lib/gic_64.S b/arch/arm/lib/gic_64.S
index d56396ea225c51905fe9f978d932195bcea9c089..a3e18f7713e571ead9d9f81c6242e486e1c092da 100644
--- a/arch/arm/lib/gic_64.S
+++ b/arch/arm/lib/gic_64.S
@@ -10,8 +10,8 @@
 #include <asm-offsets.h>
 #include <config.h>
 #include <linux/linkage.h>
-#include <asm/macro.h>
 #include <asm/gic.h>
+#include <asm/macro.h>
 
 
 /*************************************************************************
@@ -181,14 +181,10 @@ ENDPROC(gic_kick_secondary_cpus)
  *
  *************************************************************************/
 ENTRY(gic_wait_for_interrupt)
-0:	wfi
 #if defined(CONFIG_GICV3)
-	mrs	x9, ICC_IAR1_EL1
-	msr	ICC_EOIR1_EL1, x9
+	gic_wait_for_interrupt_m x9
 #elif defined(CONFIG_GICV2)
-	ldr	w9, [x0, GICC_AIAR]
-	str	w9, [x0, GICC_AEOIR]
+	gic_wait_for_interrupt_m x0, w9
 #endif
-	cbnz	w9, 0b
 	ret
 ENDPROC(gic_wait_for_interrupt)