diff --git a/common/bouncebuf.c b/common/bouncebuf.c
index 4f827f893d665d66963a7f32aeaea4f00222d9d2..1df12cdda07d04f80632190c09b841e85a774b45 100644
--- a/common/bouncebuf.c
+++ b/common/bouncebuf.c
@@ -27,21 +27,19 @@
 #include <errno.h>
 #include <bouncebuf.h>
 
-static int addr_aligned(void *data, size_t len)
+static int addr_aligned(struct bounce_buffer *state)
 {
 	const ulong align_mask = ARCH_DMA_MINALIGN - 1;
 
 	/* Check if start is aligned */
-	if ((ulong)data & align_mask) {
-		debug("Unaligned start address %p\n", data);
+	if ((ulong)state->user_buffer & align_mask) {
+		debug("Unaligned buffer address %p\n", state->user_buffer);
 		return 0;
 	}
 
-	data += len;
-
-	/* Check if end is aligned */
-	if ((ulong)data & align_mask) {
-		debug("Unaligned end address %p\n", data);
+	/* Check if length is aligned */
+	if (state->len != state->len_aligned) {
+		debug("Unaligned buffer length %d\n", state->len);
 		return 0;
 	}
 
@@ -49,44 +47,53 @@ static int addr_aligned(void *data, size_t len)
 	return 1;
 }
 
-int bounce_buffer_start(void **data, size_t len, void **backup, uint8_t flags)
+int bounce_buffer_start(struct bounce_buffer *state, void *data,
+			size_t len, unsigned int flags)
 {
-	void *tmp;
-	size_t alen;
-
-	if (addr_aligned(*data, len)) {
-		*backup = NULL;
-		return 0;
+	state->user_buffer = data;
+	state->bounce_buffer = data;
+	state->len = len;
+	state->len_aligned = roundup(len, ARCH_DMA_MINALIGN);
+	state->flags = flags;
+
+	if (!addr_aligned(state)) {
+		state->bounce_buffer = memalign(ARCH_DMA_MINALIGN,
+						state->len_aligned);
+		if (!state->bounce_buffer)
+			return -ENOMEM;
+
+		if (state->flags & GEN_BB_READ)
+			memcpy(state->bounce_buffer, state->user_buffer,
+				state->len);
 	}
 
-	alen = roundup(len, ARCH_DMA_MINALIGN);
-	tmp = memalign(ARCH_DMA_MINALIGN, alen);
-
-	if (!tmp)
-		return -ENOMEM;
-
-	if (flags & GEN_BB_READ)
-		memcpy(tmp, *data, len);
-
-	*backup = *data;
-	*data = tmp;
+	/*
+	 * Flush data to RAM so DMA reads can pick it up,
+	 * and any CPU writebacks don't race with DMA writes
+	 */
+	flush_dcache_range((unsigned long)state->bounce_buffer,
+				(unsigned long)(state->bounce_buffer) +
+					state->len_aligned);
 
 	return 0;
 }
 
-int bounce_buffer_stop(void **data, size_t len, void **backup, uint8_t flags)
+int bounce_buffer_stop(struct bounce_buffer *state)
 {
-	void *tmp = *data;
+	if (state->flags & GEN_BB_WRITE) {
+		/* Invalidate cache so that CPU can see any newly DMA'd data */
+		invalidate_dcache_range((unsigned long)state->bounce_buffer,
+					(unsigned long)(state->bounce_buffer) +
+						state->len_aligned);
+	}
 
-	/* The buffer was already aligned, since "backup" is NULL. */
-	if (!*backup)
+	if (state->bounce_buffer == state->user_buffer)
 		return 0;
 
-	if (flags & GEN_BB_WRITE)
-		memcpy(*backup, *data, len);
+	if (state->flags & GEN_BB_WRITE)
+		memcpy(state->user_buffer, state->bounce_buffer, state->len);
 
-	*data = *backup;
-	free(tmp);
+	free(state->bounce_buffer);
 
 	return 0;
 }
diff --git a/drivers/mmc/mxsmmc.c b/drivers/mmc/mxsmmc.c
index 109acbf6234bf3eb29e4caf9ba18f5c4294e784e..024df592f2256bd7f846683fd1f8d3ff61baa529 100644
--- a/drivers/mmc/mxsmmc.c
+++ b/drivers/mmc/mxsmmc.c
@@ -96,11 +96,11 @@ static int mxsmmc_send_cmd_pio(struct mxsmmc_priv *priv, struct mmc_data *data)
 static int mxsmmc_send_cmd_dma(struct mxsmmc_priv *priv, struct mmc_data *data)
 {
 	uint32_t data_count = data->blocksize * data->blocks;
-	uint32_t cache_data_count = roundup(data_count, ARCH_DMA_MINALIGN);
 	int dmach;
 	struct mxs_dma_desc *desc = priv->desc;
-	void *addr, *backup;
-	uint8_t flags;
+	void *addr;
+	unsigned int flags;
+	struct bounce_buffer bbstate;
 
 	memset(desc, 0, sizeof(struct mxs_dma_desc));
 	desc->address = (dma_addr_t)desc;
@@ -115,19 +115,9 @@ static int mxsmmc_send_cmd_dma(struct mxsmmc_priv *priv, struct mmc_data *data)
 		flags = GEN_BB_READ;
 	}
 
-	bounce_buffer_start(&addr, data_count, &backup, flags);
+	bounce_buffer_start(&bbstate, addr, data_count, flags);
 
-	priv->desc->cmd.address = (dma_addr_t)addr;
-
-	if (data->flags & MMC_DATA_WRITE) {
-		/* Flush data to DRAM so DMA can pick them up */
-		flush_dcache_range((uint32_t)addr,
-			(uint32_t)(addr) + cache_data_count);
-	}
-
-	/* Invalidate the area, so no writeback into the RAM races with DMA */
-	invalidate_dcache_range((uint32_t)priv->desc->cmd.address,
-			(uint32_t)(priv->desc->cmd.address + cache_data_count));
+	priv->desc->cmd.address = (dma_addr_t)bbstate.bounce_buffer;
 
 	priv->desc->cmd.data |= MXS_DMA_DESC_IRQ | MXS_DMA_DESC_DEC_SEM |
 				(data_count << MXS_DMA_DESC_BYTES_OFFSET);
@@ -135,17 +125,11 @@ static int mxsmmc_send_cmd_dma(struct mxsmmc_priv *priv, struct mmc_data *data)
 	dmach = MXS_DMA_CHANNEL_AHB_APBH_SSP0 + priv->id;
 	mxs_dma_desc_append(dmach, priv->desc);
 	if (mxs_dma_go(dmach)) {
-		bounce_buffer_stop(&addr, data_count, &backup, flags);
+		bounce_buffer_stop(&bbstate);
 		return COMM_ERR;
 	}
 
-	/* The data arrived into DRAM, invalidate cache over them */
-	if (data->flags & MMC_DATA_READ) {
-		invalidate_dcache_range((uint32_t)addr,
-			(uint32_t)(addr) + cache_data_count);
-	}
-
-	bounce_buffer_stop(&addr, data_count, &backup, flags);
+	bounce_buffer_stop(&bbstate);
 
 	return 0;
 }
diff --git a/include/bouncebuf.h b/include/bouncebuf.h
index aa2278c8f94ad05355b1d29bbee1104459e9e064..707b588c733c06d936a0a2fa517b9c4fad47095a 100644
--- a/include/bouncebuf.h
+++ b/include/bouncebuf.h
@@ -25,6 +25,8 @@
 #ifndef __INCLUDE_BOUNCEBUF_H__
 #define __INCLUDE_BOUNCEBUF_H__
 
+#include <linux/types.h>
+
 /*
  * GEN_BB_READ -- Data are read from the buffer eg. by DMA hardware.
  * The source buffer is copied into the bounce buffer (if unaligned, otherwise
@@ -51,23 +53,36 @@
  */
 #define GEN_BB_RW	(GEN_BB_READ | GEN_BB_WRITE)
 
+struct bounce_buffer {
+	/* Copy of data parameter passed to start() */
+	void *user_buffer;
+	/*
+	 * DMA-aligned buffer. This field is always set to the value that
+	 * should be used for DMA; either equal to .user_buffer, or to a
+	 * freshly allocated aligned buffer.
+	 */
+	void *bounce_buffer;
+	/* Copy of len parameter passed to start() */
+	size_t len;
+	/* DMA-aligned buffer length */
+	size_t len_aligned;
+	/* Copy of flags parameter passed to start() */
+	unsigned int flags;
+};
+
 /**
  * bounce_buffer_start() -- Start the bounce buffer session
+ * state:	stores state passed between bounce_buffer_{start,stop}
  * data:	pointer to buffer to be aligned
  * len:		length of the buffer
- * backup:	pointer to backup buffer (the original value is stored here if
- *              needed
  * flags:	flags describing the transaction, see above.
  */
-int bounce_buffer_start(void **data, size_t len, void **backup, uint8_t flags);
+int bounce_buffer_start(struct bounce_buffer *state, void *data,
+			size_t len, unsigned int flags);
 /**
  * bounce_buffer_stop() -- Finish the bounce buffer session
- * data:	pointer to buffer that was aligned
- * len:		length of the buffer
- * backup:	pointer to backup buffer (the original value is stored here if
- *              needed
- * flags:	flags describing the transaction, see above.
+ * state:	stores state passed between bounce_buffer_{start,stop}
  */
-int bounce_buffer_stop(void **data, size_t len, void **backup, uint8_t flags);
+int bounce_buffer_stop(struct bounce_buffer *state);
 
 #endif