diff --git a/README b/README
index a248ab5d39fc85438ba4adc6ffd0141a5dffd99c..7129df822c17c9c9bc5c7089ec54b0fed6381d01 100644
--- a/README
+++ b/README
@@ -1424,9 +1424,6 @@ The following options need to be configured:
 		CONFIG_USB_EHCI_TXFIFO_THRESH enables setting of the
 		txfilltuning field in the EHCI controller on reset.
 
-		CONFIG_USB_HUB_MIN_POWER_ON_DELAY defines the minimum
-		interval for usb hub power-on delay.(minimum 100msec)
-
 - USB Device:
 		Define the below if you wish to use the USB console.
 		Once firmware is rebuilt from a serial console issue the
diff --git a/common/usb_hub.c b/common/usb_hub.c
index ffac0e743cef38f2c98cb7a8c58d40acd5097516..2add4b97920fefedbad4d2cd04b888d294992841 100644
--- a/common/usb_hub.c
+++ b/common/usb_hub.c
@@ -35,10 +35,6 @@
 #include <asm/4xx_pci.h>
 #endif
 
-#ifndef CONFIG_USB_HUB_MIN_POWER_ON_DELAY
-#define CONFIG_USB_HUB_MIN_POWER_ON_DELAY	100
-#endif
-
 #define USB_BUFSIZ	512
 
 static struct usb_hub_device hub_dev[USB_MAX_HUB];
@@ -138,8 +134,11 @@ static void usb_hub_power_on(struct usb_hub_device *hub)
 		debug("port %d returns %lX\n", i + 1, dev->status);
 	}
 
-	/* Wait for power to become stable */
-	mdelay(max(pgood_delay, CONFIG_USB_HUB_MIN_POWER_ON_DELAY));
+	/*
+	 * Wait for power to become stable,
+	 * plus spec-defined max time for device to connect
+	 */
+	mdelay(pgood_delay + 1000);
 }
 
 void usb_hub_reset(void)
diff --git a/drivers/dfu/dfu.c b/drivers/dfu/dfu.c
index a93810934ac90ec670792b979163d9a693b97d92..dc09ff6466e6f284bbc4c4764097fa612406d8a2 100644
--- a/drivers/dfu/dfu.c
+++ b/drivers/dfu/dfu.c
@@ -13,6 +13,7 @@
 #include <mmc.h>
 #include <fat.h>
 #include <dfu.h>
+#include <hash.h>
 #include <linux/list.h>
 #include <linux/compiler.h>
 
@@ -20,6 +21,7 @@ static bool dfu_reset_request;
 static LIST_HEAD(dfu_list);
 static int dfu_alt_num;
 static int alt_num_cnt;
+static struct hash_algo *dfu_hash_algo;
 
 bool dfu_reset(void)
 {
@@ -99,6 +101,23 @@ unsigned char *dfu_get_buf(void)
 	return dfu_buf;
 }
 
+static char *dfu_get_hash_algo(void)
+{
+	char *s;
+
+	s = getenv("dfu_hash_algo");
+	if (!s)
+		return NULL;
+
+	if (!strcmp(s, "crc32")) {
+		debug("%s: DFU hash method: %s\n", __func__, s);
+		return s;
+	}
+
+	error("DFU hash method: %s not supported!\n", s);
+	return NULL;
+}
+
 static int dfu_write_buffer_drain(struct dfu_entity *dfu)
 {
 	long w_size;
@@ -109,8 +128,9 @@ static int dfu_write_buffer_drain(struct dfu_entity *dfu)
 	if (w_size == 0)
 		return 0;
 
-	/* update CRC32 */
-	dfu->crc = crc32(dfu->crc, dfu->i_buf_start, w_size);
+	if (dfu_hash_algo)
+		dfu_hash_algo->hash_update(dfu_hash_algo, &dfu->crc,
+					   dfu->i_buf_start, w_size, 0);
 
 	ret = dfu->write_medium(dfu, dfu->offset, dfu->i_buf_start, &w_size);
 	if (ret)
@@ -138,7 +158,9 @@ int dfu_flush(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num)
 	if (dfu->flush_medium)
 		ret = dfu->flush_medium(dfu);
 
-	printf("\nDFU complete CRC32: 0x%08x\n", dfu->crc);
+	if (dfu_hash_algo)
+		printf("\nDFU complete %s: 0x%08x\n", dfu_hash_algo->name,
+		       dfu->crc);
 
 	/* clear everything */
 	dfu_free_buf();
@@ -238,7 +260,11 @@ static int dfu_read_buffer_fill(struct dfu_entity *dfu, void *buf, int size)
 		/* consume */
 		if (chunk > 0) {
 			memcpy(buf, dfu->i_buf, chunk);
-			dfu->crc = crc32(dfu->crc, buf, chunk);
+			if (dfu_hash_algo)
+				dfu_hash_algo->hash_update(dfu_hash_algo,
+							   &dfu->crc, buf,
+							   chunk, 0);
+
 			dfu->i_buf += chunk;
 			dfu->b_left -= chunk;
 			dfu->r_left -= chunk;
@@ -322,7 +348,9 @@ int dfu_read(struct dfu_entity *dfu, void *buf, int size, int blk_seq_num)
 	}
 
 	if (ret < size) {
-		debug("%s: %s CRC32: 0x%x\n", __func__, dfu->name, dfu->crc);
+		if (dfu_hash_algo)
+			debug("%s: %s %s: 0x%x\n", __func__, dfu->name,
+			      dfu_hash_algo->name, dfu->crc);
 		puts("\nUPLOAD ... done\nCtrl+C to exit ...\n");
 
 		dfu_free_buf();
@@ -397,6 +425,14 @@ int dfu_config_entities(char *env, char *interface, int num)
 	dfu_alt_num = dfu_find_alt_num(env);
 	debug("%s: dfu_alt_num=%d\n", __func__, dfu_alt_num);
 
+	dfu_hash_algo = NULL;
+	s = dfu_get_hash_algo();
+	if (s) {
+		ret = hash_lookup_algo(s, &dfu_hash_algo);
+		if (ret)
+			error("Hash algorithm %s not supported\n", s);
+	}
+
 	dfu = calloc(sizeof(*dfu), dfu_alt_num);
 	if (!dfu)
 		return -1;
diff --git a/drivers/usb/gadget/ci_udc.c b/drivers/usb/gadget/ci_udc.c
index 9cd003636a4499ba57637f37f9e8fdeed709340f..b18bee43ad894ed886076ac126a23fa42a6761e5 100644
--- a/drivers/usb/gadget/ci_udc.c
+++ b/drivers/usb/gadget/ci_udc.c
@@ -56,14 +56,7 @@ static const char *reqname(unsigned r)
 }
 #endif
 
-static struct usb_endpoint_descriptor ep0_out_desc = {
-	.bLength = sizeof(struct usb_endpoint_descriptor),
-	.bDescriptorType = USB_DT_ENDPOINT,
-	.bEndpointAddress = 0,
-	.bmAttributes =	USB_ENDPOINT_XFER_CONTROL,
-};
-
-static struct usb_endpoint_descriptor ep0_in_desc = {
+static struct usb_endpoint_descriptor ep0_desc = {
 	.bLength = sizeof(struct usb_endpoint_descriptor),
 	.bDescriptorType = USB_DT_ENDPOINT,
 	.bEndpointAddress = USB_DIR_IN,
@@ -205,8 +198,14 @@ static void ci_invalidate_qtd(int ep_num)
 static struct usb_request *
 ci_ep_alloc_request(struct usb_ep *ep, unsigned int gfp_flags)
 {
+	struct ci_ep *ci_ep = container_of(ep, struct ci_ep, ep);
+	int num;
 	struct ci_req *ci_req;
 
+	num = ci_ep->desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
+	if (num == 0 && controller.ep0_req)
+		return &controller.ep0_req->req;
+
 	ci_req = memalign(ARCH_DMA_MINALIGN, sizeof(*ci_req));
 	if (!ci_req)
 		return NULL;
@@ -214,14 +213,22 @@ ci_ep_alloc_request(struct usb_ep *ep, unsigned int gfp_flags)
 	INIT_LIST_HEAD(&ci_req->queue);
 	ci_req->b_buf = 0;
 
+	if (num == 0)
+		controller.ep0_req = ci_req;
+
 	return &ci_req->req;
 }
 
 static void ci_ep_free_request(struct usb_ep *ep, struct usb_request *req)
 {
-	struct ci_req *ci_req;
+	struct ci_ep *ci_ep = container_of(ep, struct ci_ep, ep);
+	struct ci_req *ci_req = container_of(req, struct ci_req, req);
+	int num;
+
+	num = ci_ep->desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
+	if (num == 0)
+		controller.ep0_req = 0;
 
-	ci_req = container_of(req, struct ci_req, req);
 	if (ci_req->b_buf)
 		free(ci_req->b_buf);
 	free(ci_req);
@@ -362,18 +369,50 @@ static void ci_ep_submit_next_request(struct ci_ep *ci_ep)
 	ci_req = list_first_entry(&ci_ep->queue, struct ci_req, queue);
 	len = ci_req->req.length;
 
-	item->next = TERMINATE;
-	item->info = INFO_BYTES(len) | INFO_IOC | INFO_ACTIVE;
+	item->info = INFO_BYTES(len) | INFO_ACTIVE;
 	item->page0 = (uint32_t)ci_req->hw_buf;
 	item->page1 = ((uint32_t)ci_req->hw_buf & 0xfffff000) + 0x1000;
 	item->page2 = ((uint32_t)ci_req->hw_buf & 0xfffff000) + 0x2000;
 	item->page3 = ((uint32_t)ci_req->hw_buf & 0xfffff000) + 0x3000;
 	item->page4 = ((uint32_t)ci_req->hw_buf & 0xfffff000) + 0x4000;
-	ci_flush_qtd(num);
 
 	head->next = (unsigned) item;
 	head->info = 0;
 
+	/*
+	 * When sending the data for an IN transaction, the attached host
+	 * knows that all data for the IN is sent when one of the following
+	 * occurs:
+	 * a) A zero-length packet is transmitted.
+	 * b) A packet with length that isn't an exact multiple of the ep's
+	 *    maxpacket is transmitted.
+	 * c) Enough data is sent to exactly fill the host's maximum expected
+	 *    IN transaction size.
+	 *
+	 * One of these conditions MUST apply at the end of an IN transaction,
+	 * or the transaction will not be considered complete by the host. If
+	 * none of (a)..(c) already applies, then we must force (a) to apply
+	 * by explicitly sending an extra zero-length packet.
+	 */
+	/*  IN    !a     !b                              !c */
+	if (in && len && !(len % ci_ep->ep.maxpacket) && ci_req->req.zero) {
+		/*
+		 * Each endpoint has 2 items allocated, even though typically
+		 * only 1 is used at a time since either an IN or an OUT but
+		 * not both is queued. For an IN transaction, item currently
+		 * points at the second of these items, so we know that we
+		 * can use (item - 1) to transmit the extra zero-length packet
+		 */
+		item->next = (unsigned)(item - 1);
+		item--;
+		item->info = INFO_ACTIVE;
+	}
+
+	item->next = TERMINATE;
+	item->info |= INFO_IOC;
+
+	ci_flush_qtd(num);
+
 	DBG("ept%d %s queue len %x, req %p, buffer %p\n",
 	    num, in ? "in" : "out", len, ci_req, ci_req->hw_buf);
 	ci_flush_qh(num);
@@ -397,6 +436,21 @@ static int ci_ep_queue(struct usb_ep *ep,
 	num = ci_ep->desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
 	in = (ci_ep->desc->bEndpointAddress & USB_DIR_IN) != 0;
 
+	if (!num && ci_ep->req_primed) {
+		/*
+		 * The flipping of ep0 between IN and OUT relies on
+		 * ci_ep_queue consuming the current IN/OUT setting
+		 * immediately. If this is deferred to a later point when the
+		 * req is pulled out of ci_req->queue, then the IN/OUT setting
+		 * may have been changed since the req was queued, and state
+		 * will get out of sync. This condition doesn't occur today,
+		 * but could if bugs were introduced later, and this error
+		 * check will save a lot of debugging time.
+		 */
+		printf("%s: ep0 transaction already in progress\n", __func__);
+		return -EPROTO;
+	}
+
 	ret = ci_bounce(ci_req, in);
 	if (ret)
 		return ret;
@@ -411,6 +465,17 @@ static int ci_ep_queue(struct usb_ep *ep,
 	return 0;
 }
 
+static void flip_ep0_direction(void)
+{
+	if (ep0_desc.bEndpointAddress == USB_DIR_IN) {
+		DBG("%s: Flipping ep0 ot OUT\n", __func__);
+		ep0_desc.bEndpointAddress = 0;
+	} else {
+		DBG("%s: Flipping ep0 ot IN\n", __func__);
+		ep0_desc.bEndpointAddress = USB_DIR_IN;
+	}
+}
+
 static void handle_ep_complete(struct ci_ep *ep)
 {
 	struct ept_queue_item *item;
@@ -419,8 +484,6 @@ static void handle_ep_complete(struct ci_ep *ep)
 
 	num = ep->desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
 	in = (ep->desc->bEndpointAddress & USB_DIR_IN) != 0;
-	if (num == 0)
-		ep->desc = &ep0_out_desc;
 	item = ci_get_qtd(num, in);
 	ci_invalidate_qtd(num);
 
@@ -441,11 +504,18 @@ static void handle_ep_complete(struct ci_ep *ep)
 
 	DBG("ept%d %s req %p, complete %x\n",
 	    num, in ? "in" : "out", ci_req, len);
-	ci_req->req.complete(&ep->ep, &ci_req->req);
-	if (num == 0) {
+	if (num != 0 || controller.ep0_data_phase)
+		ci_req->req.complete(&ep->ep, &ci_req->req);
+	if (num == 0 && controller.ep0_data_phase) {
+		/*
+		 * Data Stage is complete, so flip ep0 dir for Status Stage,
+		 * which always transfers a packet in the opposite direction.
+		 */
+		DBG("%s: flip ep0 dir for Status Stage\n", __func__);
+		flip_ep0_direction();
+		controller.ep0_data_phase = false;
 		ci_req->req.length = 0;
 		usb_ep_queue(&ep->ep, &ci_req->req, 0);
-		ep->desc = &ep0_in_desc;
 	}
 }
 
@@ -463,7 +533,7 @@ static void handle_setup(void)
 	int num, in, _num, _in, i;
 	char *buf;
 
-	ci_req = list_first_entry(&ci_ep->queue, struct ci_req, queue);
+	ci_req = controller.ep0_req;
 	req = &ci_req->req;
 	head = ci_get_qh(0, 0);	/* EP0 OUT */
 
@@ -474,8 +544,26 @@ static void handle_setup(void)
 #else
 	writel(EPT_RX(0), &udc->epstat);
 #endif
-	DBG("handle setup %s, %x, %x index %x value %x\n", reqname(r.bRequest),
-	    r.bRequestType, r.bRequest, r.wIndex, r.wValue);
+	DBG("handle setup %s, %x, %x index %x value %x length %x\n",
+	    reqname(r.bRequest), r.bRequestType, r.bRequest, r.wIndex,
+	    r.wValue, r.wLength);
+
+	/* Set EP0 dir for Data Stage based on Setup Stage data */
+	if (r.bRequestType & USB_DIR_IN) {
+		DBG("%s: Set ep0 to IN for Data Stage\n", __func__);
+		ep0_desc.bEndpointAddress = USB_DIR_IN;
+	} else {
+		DBG("%s: Set ep0 to OUT for Data Stage\n", __func__);
+		ep0_desc.bEndpointAddress = 0;
+	}
+	if (r.wLength) {
+		controller.ep0_data_phase = true;
+	} else {
+		/* 0 length -> no Data Stage. Flip dir for Status Stage */
+		DBG("%s: 0 length: flip ep0 dir for Status Stage\n", __func__);
+		flip_ep0_direction();
+		controller.ep0_data_phase = false;
+	}
 
 	list_del_init(&ci_req->queue);
 	ci_ep->req_primed = false;
@@ -646,6 +734,17 @@ int usb_gadget_handle_interrupts(void)
 	return value;
 }
 
+void udc_disconnect(void)
+{
+	struct ci_udc *udc = (struct ci_udc *)controller.ctrl->hcor;
+	/* disable pullup */
+	stop_activity();
+	writel(USBCMD_FS2, &udc->usbcmd);
+	udelay(800);
+	if (controller.driver)
+		controller.driver->disconnect(&controller.gadget);
+}
+
 static int ci_pullup(struct usb_gadget *gadget, int is_on)
 {
 	struct ci_udc *udc = (struct ci_udc *)controller.ctrl->hcor;
@@ -664,27 +763,12 @@ static int ci_pullup(struct usb_gadget *gadget, int is_on)
 		/* Turn on the USB connection by enabling the pullup resistor */
 		writel(USBCMD_ITC(MICRO_8FRAME) | USBCMD_RUN, &udc->usbcmd);
 	} else {
-		stop_activity();
-		writel(USBCMD_FS2, &udc->usbcmd);
-		udelay(800);
-		if (controller.driver)
-			controller.driver->disconnect(gadget);
+		udc_disconnect();
 	}
 
 	return 0;
 }
 
-void udc_disconnect(void)
-{
-	struct ci_udc *udc = (struct ci_udc *)controller.ctrl->hcor;
-	/* disable pullup */
-	stop_activity();
-	writel(USBCMD_FS2, &udc->usbcmd);
-	udelay(800);
-	if (controller.driver)
-		controller.driver->disconnect(&controller.gadget);
-}
-
 static int ci_udc_probe(void)
 {
 	struct ept_queue_head *head;
@@ -756,7 +840,7 @@ static int ci_udc_probe(void)
 
 	/* Init EP 0 */
 	memcpy(&controller.ep[0].ep, &ci_ep_init[0], sizeof(*ci_ep_init));
-	controller.ep[0].desc = &ep0_in_desc;
+	controller.ep[0].desc = &ep0_desc;
 	INIT_LIST_HEAD(&controller.ep[0].queue);
 	controller.ep[0].req_primed = false;
 	controller.gadget.ep0 = &controller.ep[0].ep;
@@ -772,6 +856,13 @@ static int ci_udc_probe(void)
 			      &controller.gadget.ep_list);
 	}
 
+	ci_ep_alloc_request(&controller.ep[0].ep, 0);
+	if (!controller.ep0_req) {
+		free(controller.items_mem);
+		free(controller.epts);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -816,5 +907,11 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
+	udc_disconnect();
+
+	ci_ep_free_request(&controller.ep[0].ep, &controller.ep0_req->req);
+	free(controller.items_mem);
+	free(controller.epts);
+
 	return 0;
 }
diff --git a/drivers/usb/gadget/ci_udc.h b/drivers/usb/gadget/ci_udc.h
index 23cff56d7ec92d7653ece4611db27cfa8c5ce7a7..c2144021e6750c56badd300e31e01814a92e42e8 100644
--- a/drivers/usb/gadget/ci_udc.h
+++ b/drivers/usb/gadget/ci_udc.h
@@ -97,6 +97,8 @@ struct ci_ep {
 
 struct ci_drv {
 	struct usb_gadget		gadget;
+	struct ci_req			*ep0_req;
+	bool				ep0_data_phase;
 	struct usb_gadget_driver	*driver;
 	struct ehci_ctrl		*ctrl;
 	struct ept_queue_head		*epts;
diff --git a/include/configs/cm_t35.h b/include/configs/cm_t35.h
index 8c60e22c1dd053e5e98c1deff068b470c6066469..5c484ef07882510cc02dd839b12c1e9530fee5f9 100644
--- a/include/configs/cm_t35.h
+++ b/include/configs/cm_t35.h
@@ -104,8 +104,6 @@
 #define CONFIG_USB_DEVICE
 #define CONFIG_USB_TTY
 #define CONFIG_SYS_CONSOLE_IS_IN_ENV
-/* This delay is really for slow-to-power-on USB sticks, not the hub */
-#define CONFIG_USB_HUB_MIN_POWER_ON_DELAY 500
 
 /* commands to include */
 #include <config_cmd_default.h>
diff --git a/include/configs/gw_ventana.h b/include/configs/gw_ventana.h
index cd554957dd89148dbe33c2b38160bbd9714c1656..f41c96ea7fbcd789ec4cae0b83aea3d56542defa 100644
--- a/include/configs/gw_ventana.h
+++ b/include/configs/gw_ventana.h
@@ -192,7 +192,6 @@
 #define CONFIG_USB_ETH_CDC
 #define CONFIG_NETCONSOLE
 #define CONFIG_SYS_USB_EVENT_POLL_VIA_CONTROL_EP
-#define CONFIG_USB_HUB_MIN_POWER_ON_DELAY 1200
 
 /* Framebuffer and LCD */
 #define CONFIG_VIDEO