diff --git a/CHANGELOG b/CHANGELOG
index 4cd62f66e658e44a0377010c12b6452abfc32532..82d977ee2d20cae0af61a9daf29030f8b9b1590e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,16 @@
 Changes since U-Boot 1.1.1:
 ======================================================================
 
+* Patch by Markus Pietrek, 04 May 2004:
+  Fix clear_bss code for ARM systems (all except s3c44b0 which
+  doesn't clear BSS at all?)
+
+* Fix "ping" problem on INC-IP board. Strange problem:
+  Sometimes the store word instruction hangs while writing to one of
+  the Switch registers, but only if the next instruction is 16-byte
+  aligned. Moving the instruction into a separate function somehow
+  makes the problem go away.
+
 * Patch by Rishi Bhattacharya, 08 May 2004:
   Add support for TI OMAP5912 OSK Board
 
diff --git a/cpu/arm720t/start.S b/cpu/arm720t/start.S
index 791049a2f546dc2d1a686773f95b67b49046bf8b..f6ae9d67a4a576e8ca65f682b8dc542aaf74e053 100644
--- a/cpu/arm720t/start.S
+++ b/cpu/arm720t/start.S
@@ -154,7 +154,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/arm920t/start.S b/cpu/arm920t/start.S
index 49264da99277325485fb24832e7485b190dbc712..0e372d0e4ceb5ebcef10ddeff9355eaf9b16cd69 100644
--- a/cpu/arm920t/start.S
+++ b/cpu/arm920t/start.S
@@ -191,7 +191,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/arm925t/start.S b/cpu/arm925t/start.S
index da84de19bd37832cec28ab25acdc56f3e15e9ed2..134a57639e25b24e3011205fb22956f294ba6f23 100644
--- a/cpu/arm925t/start.S
+++ b/cpu/arm925t/start.S
@@ -197,7 +197,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/arm926ejs/start.S b/cpu/arm926ejs/start.S
index ad5d84770e5651e4915e933635ef7ad00d2b706b..70be4de628909217eb887a0f6def5ff80239b75a 100644
--- a/cpu/arm926ejs/start.S
+++ b/cpu/arm926ejs/start.S
@@ -172,7 +172,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/at91rm9200/start.S b/cpu/at91rm9200/start.S
index b9b889ab8cd75d9686b358dc9a27d221d843508a..d73af20dfb1416f23658c93809eae52ce37285f1 100644
--- a/cpu/at91rm9200/start.S
+++ b/cpu/at91rm9200/start.S
@@ -147,7 +147,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/ixp/start.S b/cpu/ixp/start.S
index 09ecc73a00c8a9ce22869ea30eda54dff9b95b39..9240b5cd3e656df20fdaaba09d146c9208a089f3 100644
--- a/cpu/ixp/start.S
+++ b/cpu/ixp/start.S
@@ -289,7 +289,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/pxa/start.S b/cpu/pxa/start.S
index de2a084aadf1e37a2e86fd0f6b12c0d31be6e93f..b1f6e7e615b9836382b4790ecf3352556a8aeb29 100644
--- a/cpu/pxa/start.S
+++ b/cpu/pxa/start.S
@@ -141,7 +141,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/cpu/sa1100/start.S b/cpu/sa1100/start.S
index fe1316cbe7624085b0984bba9ee0fad26e91d380..52f2c5d36e3ba40d1de957a31fe83954e6398057 100644
--- a/cpu/sa1100/start.S
+++ b/cpu/sa1100/start.S
@@ -154,7 +154,6 @@ stack_setup:
 
 clear_bss:
 	ldr	r0, _bss_start		/* find start of bss segment        */
-	add	r0, r0, #4		/* start at first byte of bss       */
 	ldr	r1, _bss_end		/* stop here                        */
 	mov 	r2, #0x00000000		/* clear                            */
 
diff --git a/drivers/inca-ip_sw.c b/drivers/inca-ip_sw.c
index f8fe52ea26db9def4535bcce7eac9272b2b59598..ab22b4d5385417d5563bc73029157a6e3a129fcb 100644
--- a/drivers/inca-ip_sw.c
+++ b/drivers/inca-ip_sw.c
@@ -41,13 +41,21 @@
 
 
 #define DELAY	udelay(10000)
+  /* Sometimes the store word instruction hangs while writing to one
+   * of the Switch registers. Moving the instruction into a separate
+   * function somehow makes the problem go away.
+   */
+static void SWORD(volatile u32 * reg, u32 value)
+{
+	*reg = value;
+}
 
 #define DMA_WRITE_REG(reg, value) *((volatile u32 *)reg) = (u32)value;
 #define DMA_READ_REG(reg, value)    value = (u32)*((volatile u32*)reg)
 #define SW_WRITE_REG(reg, value)   \
-	*((volatile u32*)reg) = (u32)value;\
+	SWORD(reg, value);\
 	DELAY;\
-	*((volatile u32*)reg) = (u32)value;
+	SWORD(reg, value);
 
 #define SW_READ_REG(reg, value)	   \
 	value = (u32)*((volatile u32*)reg);\