diff --git a/common/board_f.c b/common/board_f.c
index 8cca4def2c7924fad2d5f31106cd42739e69fdcd..74f77f1ea1c494094f079ad38293e2433d28010f 100644
--- a/common/board_f.c
+++ b/common/board_f.c
@@ -499,6 +499,7 @@ static int setup_machine(void)
 static int reserve_global_data(void)
 {
 	gd->start_addr_sp -= sizeof(gd_t);
+	gd->start_addr_sp &= ~0xf;
 	gd->new_gd = (gd_t *)map_sysmem(gd->start_addr_sp, sizeof(gd_t));
 	debug("Reserving %zu Bytes for Global Data at: %08lx\n",
 			sizeof(gd_t), gd->start_addr_sp);
diff --git a/include/asm-generic/global_data.h b/include/asm-generic/global_data.h
index 21552650025a108f4136267e4c63ea01584ea34c..cc369fcdfb9e3c7a628bb53a7a713d0d6a69f832 100644
--- a/include/asm-generic/global_data.h
+++ b/include/asm-generic/global_data.h
@@ -99,7 +99,8 @@ typedef struct global_data {
 	int pcidelay_done;
 #endif
 	struct udevice *cur_serial_dev;	/* current serial device */
-	struct arch_global_data arch;	/* architecture-specific data */
+	/* arch-specific data */
+	struct arch_global_data arch __attribute__((aligned(16)));
 } gd_t;
 #endif