diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index 1d6faa2a9249b023e265681086804570855655fe..fb6b502dcaa2188b907ef2ab76801adbdf048b62 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -46,6 +46,7 @@ COBJS-$(CONFIG_SDHCI) += sdhci.o
 COBJS-$(CONFIG_BCM2835_SDHCI) += bcm2835_sdhci.o
 COBJS-$(CONFIG_S5P_SDHCI) += s5p_sdhci.o
 COBJS-$(CONFIG_SH_MMCIF) += sh_mmcif.o
+COBJS-$(CONFIG_SPEAR_SDHCI) += spear_sdhci.o
 COBJS-$(CONFIG_TEGRA_MMC) += tegra_mmc.o
 COBJS-$(CONFIG_DWMMC) += dw_mmc.o
 COBJS-$(CONFIG_EXYNOS_DWMMC) += exynos_dw_mmc.o
diff --git a/drivers/mmc/davinci_mmc.c b/drivers/mmc/davinci_mmc.c
index e2379e326eeea7d367d08453d22d76c4627794fb..5aa218426f4948eb12f686b0b6099ce58ba9d376 100644
--- a/drivers/mmc/davinci_mmc.c
+++ b/drivers/mmc/davinci_mmc.c
@@ -285,8 +285,11 @@ dmmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd, struct mmc_data *data)
 			 */
 			if (bytes_left > fifo_bytes)
 				dmmc_wait_fifo_status(regs, 0x4a);
-			else if (bytes_left == fifo_bytes)
+			else if (bytes_left == fifo_bytes) {
 				dmmc_wait_fifo_status(regs, 0x40);
+				if (cmd->cmdidx == MMC_CMD_SEND_EXT_CSD)
+					udelay(600);
+			}
 
 			for (i = 0; bytes_left && (i < fifo_words); i++) {
 				cmddata = get_val(&regs->mmcdrr);
diff --git a/drivers/mmc/fsl_esdhc.c b/drivers/mmc/fsl_esdhc.c
index e945c0a470ca85667026a422933e7669effcb6a2..ec01795f32c1bd47545461ca0bedce729d75d7f2 100644
--- a/drivers/mmc/fsl_esdhc.c
+++ b/drivers/mmc/fsl_esdhc.c
@@ -601,8 +601,7 @@ int fsl_esdhc_mmc_init(bd_t *bis)
 {
 	struct fsl_esdhc_cfg *cfg;
 
-	cfg = malloc(sizeof(struct fsl_esdhc_cfg));
-	memset(cfg, 0, sizeof(struct fsl_esdhc_cfg));
+	cfg = calloc(sizeof(struct fsl_esdhc_cfg), 1);
 	cfg->esdhc_base = CONFIG_SYS_FSL_ESDHC_ADDR;
 	cfg->sdhc_clk = gd->arch.sdhc_clk;
 	return fsl_esdhc_initialize(bis, cfg);
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 2590f1bcce5e1fa223732e15c33cdd980ff0e725..0a2f5358e2fe386a322a151fdb93cbd5ea50f497 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -524,48 +524,70 @@ static int sd_send_op_cond(struct mmc *mmc)
 	return 0;
 }
 
-static int mmc_send_op_cond(struct mmc *mmc)
+/* We pass in the cmd since otherwise the init seems to fail */
+static int mmc_send_op_cond_iter(struct mmc *mmc, struct mmc_cmd *cmd,
+		int use_arg)
 {
-	int timeout = 10000;
-	struct mmc_cmd cmd;
 	int err;
 
+	cmd->cmdidx = MMC_CMD_SEND_OP_COND;
+	cmd->resp_type = MMC_RSP_R3;
+	cmd->cmdarg = 0;
+	if (use_arg && !mmc_host_is_spi(mmc)) {
+		cmd->cmdarg =
+			(mmc->voltages &
+			(mmc->op_cond_response & OCR_VOLTAGE_MASK)) |
+			(mmc->op_cond_response & OCR_ACCESS_MODE);
+
+		if (mmc->host_caps & MMC_MODE_HC)
+			cmd->cmdarg |= OCR_HCS;
+	}
+	err = mmc_send_cmd(mmc, cmd, NULL);
+	if (err)
+		return err;
+	mmc->op_cond_response = cmd->response[0];
+	return 0;
+}
+
+int mmc_send_op_cond(struct mmc *mmc)
+{
+	struct mmc_cmd cmd;
+	int err, i;
+
 	/* Some cards seem to need this */
 	mmc_go_idle(mmc);
 
  	/* Asking to the card its capabilities */
- 	cmd.cmdidx = MMC_CMD_SEND_OP_COND;
- 	cmd.resp_type = MMC_RSP_R3;
- 	cmd.cmdarg = 0;
-
- 	err = mmc_send_cmd(mmc, &cmd, NULL);
+	mmc->op_cond_pending = 1;
+	for (i = 0; i < 2; i++) {
+		err = mmc_send_op_cond_iter(mmc, &cmd, i != 0);
+		if (err)
+			return err;
 
- 	if (err)
- 		return err;
+		/* exit if not busy (flag seems to be inverted) */
+		if (mmc->op_cond_response & OCR_BUSY)
+			return 0;
+	}
+	return IN_PROGRESS;
+}
 
- 	udelay(1000);
+int mmc_complete_op_cond(struct mmc *mmc)
+{
+	struct mmc_cmd cmd;
+	int timeout = 1000;
+	uint start;
+	int err;
 
+	mmc->op_cond_pending = 0;
+	start = get_timer(0);
 	do {
-		cmd.cmdidx = MMC_CMD_SEND_OP_COND;
-		cmd.resp_type = MMC_RSP_R3;
-		cmd.cmdarg = (mmc_host_is_spi(mmc) ? 0 :
-				(mmc->voltages &
-				(cmd.response[0] & OCR_VOLTAGE_MASK)) |
-				(cmd.response[0] & OCR_ACCESS_MODE));
-
-		if (mmc->host_caps & MMC_MODE_HC)
-			cmd.cmdarg |= OCR_HCS;
-
-		err = mmc_send_cmd(mmc, &cmd, NULL);
-
+		err = mmc_send_op_cond_iter(mmc, &cmd, 1);
 		if (err)
 			return err;
-
-		udelay(1000);
-	} while (!(cmd.response[0] & OCR_BUSY) && timeout--);
-
-	if (timeout <= 0)
-		return UNUSABLE_ERR;
+		if (get_timer(start) > timeout)
+			return UNUSABLE_ERR;
+		udelay(100);
+	} while (!(mmc->op_cond_response & OCR_BUSY));
 
 	if (mmc_host_is_spi(mmc)) { /* read OCR for spi */
 		cmd.cmdidx = MMC_CMD_SPI_READ_OCR;
@@ -1274,7 +1296,7 @@ block_dev_desc_t *mmc_get_dev(int dev)
 }
 #endif
 
-int mmc_init(struct mmc *mmc)
+int mmc_start_init(struct mmc *mmc)
 {
 	int err;
 
@@ -1314,17 +1336,48 @@ int mmc_init(struct mmc *mmc)
 	if (err == TIMEOUT) {
 		err = mmc_send_op_cond(mmc);
 
-		if (err) {
+		if (err && err != IN_PROGRESS) {
 			printf("Card did not respond to voltage select!\n");
 			return UNUSABLE_ERR;
 		}
 	}
 
-	err = mmc_startup(mmc);
+	if (err == IN_PROGRESS)
+		mmc->init_in_progress = 1;
+
+	return err;
+}
+
+static int mmc_complete_init(struct mmc *mmc)
+{
+	int err = 0;
+
+	if (mmc->op_cond_pending)
+		err = mmc_complete_op_cond(mmc);
+
+	if (!err)
+		err = mmc_startup(mmc);
 	if (err)
 		mmc->has_init = 0;
 	else
 		mmc->has_init = 1;
+	mmc->init_in_progress = 0;
+	return err;
+}
+
+int mmc_init(struct mmc *mmc)
+{
+	int err = IN_PROGRESS;
+	unsigned start = get_timer(0);
+
+	if (mmc->has_init)
+		return 0;
+	if (!mmc->init_in_progress)
+		err = mmc_start_init(mmc);
+
+	if (!err || err == IN_PROGRESS)
+		err = mmc_complete_init(mmc);
+	debug("%s: %d, time %lu\n", __func__, err, get_timer(start));
 	return err;
 }
 
@@ -1362,6 +1415,25 @@ int get_mmc_num(void)
 	return cur_dev_num;
 }
 
+void mmc_set_preinit(struct mmc *mmc, int preinit)
+{
+	mmc->preinit = preinit;
+}
+
+static void do_preinit(void)
+{
+	struct mmc *m;
+	struct list_head *entry;
+
+	list_for_each(entry, &mmc_devices) {
+		m = list_entry(entry, struct mmc, link);
+
+		if (m->preinit)
+			mmc_start_init(m);
+	}
+}
+
+
 int mmc_initialize(bd_t *bis)
 {
 	INIT_LIST_HEAD (&mmc_devices);
@@ -1372,5 +1444,6 @@ int mmc_initialize(bd_t *bis)
 
 	print_mmc_devices(',');
 
+	do_preinit();
 	return 0;
 }
diff --git a/drivers/mmc/mv_sdhci.c b/drivers/mmc/mv_sdhci.c
index 2fe34b6993b8a8a006da4d0ae5704e1d1e0f66a7..63e1f9062b8a406e020953c3cf9b1a0c630d452b 100644
--- a/drivers/mmc/mv_sdhci.c
+++ b/drivers/mmc/mv_sdhci.c
@@ -51,6 +51,5 @@ int mv_sdh_init(u32 regbase, u32 max_clk, u32 min_clk, u32 quirks)
 		host->version = sdhci_readl(host, SDHCI_HOST_VERSION - 2) >> 16;
 	else
 		host->version = sdhci_readw(host, SDHCI_HOST_VERSION);
-	add_sdhci(host, max_clk, min_clk);
-	return 0;
+	return add_sdhci(host, max_clk, min_clk);
 }
diff --git a/drivers/mmc/s5p_sdhci.c b/drivers/mmc/s5p_sdhci.c
index dc49d37f5caee08ebfccd0efe182eed6bc53ffeb..e50ff925a5691164b26b7b8eff09fee637b4707c 100644
--- a/drivers/mmc/s5p_sdhci.c
+++ b/drivers/mmc/s5p_sdhci.c
@@ -94,6 +94,5 @@ int s5p_sdhci_init(u32 regbase, int index, int bus_width)
 
 	host->host_caps = MMC_MODE_HC;
 
-	add_sdhci(host, 52000000, 400000);
-	return 0;
+	return add_sdhci(host, 52000000, 400000);
 }
diff --git a/drivers/mmc/spear_sdhci.c b/drivers/mmc/spear_sdhci.c
new file mode 100644
index 0000000000000000000000000000000000000000..23f1f4b7016e82578310c60665488b7716b96c85
--- /dev/null
+++ b/drivers/mmc/spear_sdhci.c
@@ -0,0 +1,44 @@
+/*
+ * (C) Copyright 2012
+ * Vipin Kumar, ST Micoelectronics, vipin.kumar@st.com.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <common.h>
+#include <malloc.h>
+#include <sdhci.h>
+
+int spear_sdhci_init(u32 regbase, u32 max_clk, u32 min_clk, u32 quirks)
+{
+	struct sdhci_host *host = NULL;
+	host = (struct sdhci_host *)malloc(sizeof(struct sdhci_host));
+	if (!host) {
+		printf("sdhci host malloc fail!\n");
+		return 1;
+	}
+
+	host->name = "sdhci";
+	host->ioaddr = (void *)regbase;
+	host->quirks = quirks;
+
+	if (quirks & SDHCI_QUIRK_REG32_RW)
+		host->version = sdhci_readl(host, SDHCI_HOST_VERSION - 2) >> 16;
+	else
+		host->version = sdhci_readw(host, SDHCI_HOST_VERSION);
+
+	add_sdhci(host, max_clk, min_clk);
+	return 0;
+}
diff --git a/include/mmc.h b/include/mmc.h
index 8bbc6b6ebec9071eaf3bbcc071ff41b8950c6ece..566db59ac9fdd5e682f5866b34673164e49e8d15 100644
--- a/include/mmc.h
+++ b/include/mmc.h
@@ -68,6 +68,7 @@
 #define UNUSABLE_ERR		-17 /* Unusable Card */
 #define COMM_ERR		-18 /* Communications Error */
 #define TIMEOUT			-19
+#define IN_PROGRESS		-20 /* operation is in progress */
 
 #define MMC_CMD_GO_IDLE_STATE		0
 #define MMC_CMD_SEND_OP_COND		1
@@ -270,6 +271,10 @@ struct mmc {
 	int (*getcd)(struct mmc *mmc);
 	int (*getwp)(struct mmc *mmc);
 	uint b_max;
+	char op_cond_pending;	/* 1 if we are waiting on an op_cond command */
+	char init_in_progress;	/* 1 if we have done mmc_start_init() */
+	char preinit;		/* start init as early as possible */
+	uint op_cond_response;	/* the response byte from the last op_cond */
 };
 
 int mmc_register(struct mmc *mmc);
@@ -287,6 +292,31 @@ int mmc_getcd(struct mmc *mmc);
 int mmc_getwp(struct mmc *mmc);
 void spl_mmc_load(void) __noreturn;
 
+/**
+ * Start device initialization and return immediately; it does not block on
+ * polling OCR (operation condition register) status.  Then you should call
+ * mmc_init, which would block on polling OCR status and complete the device
+ * initializatin.
+ *
+ * @param mmc	Pointer to a MMC device struct
+ * @return 0 on success, IN_PROGRESS on waiting for OCR status, <0 on error.
+ */
+int mmc_start_init(struct mmc *mmc);
+
+/**
+ * Set preinit flag of mmc device.
+ *
+ * This will cause the device to be pre-inited during mmc_initialize(),
+ * which may save boot time if the device is not accessed until later.
+ * Some eMMC devices take 200-300ms to init, but unfortunately they
+ * must be sent a series of commands to even get them to start preparing
+ * for operation.
+ *
+ * @param mmc		Pointer to a MMC device struct
+ * @param preinit	preinit flag value
+ */
+void mmc_set_preinit(struct mmc *mmc, int preinit);
+
 #ifdef CONFIG_GENERIC_MMC
 #define mmc_host_is_spi(mmc)	((mmc)->host_caps & MMC_MODE_SPI)
 struct mmc *mmc_spi_init(uint bus, uint cs, uint speed, uint mode);