Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • reform/reform-boundary-uboot
  • josch/reform-boundary-uboot
  • jacqueline/reform-boundary-uboot
  • cinap_lenrek/reform-boundary-uboot
  • jackhumbert/reform-boundary-uboot
  • sevan/reform-boundary-uboot
  • khm/reform-boundary-uboot
7 results
Show changes
Showing
with 666 additions and 938 deletions
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#ifndef __ASM_ARC_U_BOOT_H__
......
# SPDX-License-Identifier: GPL-2.0+
#
# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
#
# SPDX-License-Identifier: GPL-2.0+
#
extra-y = start.o
head-y := start.o
......@@ -10,13 +8,6 @@ obj-y += cache.o
obj-y += cpu.o
obj-y += interrupts.o
obj-y += relocate.o
obj-y += strchr-700.o
obj-y += strcmp.o
obj-y += strcpy-700.o
obj-y += strlen.o
obj-y += memcmp.o
obj-y += memcpy-700.o
obj-y += memset.o
obj-y += reset.o
obj-y += ints_low.o
obj-y += init_helpers.o
......
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright (C) 1995, 1997, 2007-2013 Free Software Foundation, Inc.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/* ANSI concatenation macros. */
......
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <asm/cache.h>
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
......@@ -37,62 +37,67 @@ void arch_lmb_reserve(struct lmb *lmb)
lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp));
}
int arch_fixup_fdt(void *blob)
{
return 0;
}
static int cleanup_before_linux(void)
{
disable_interrupts();
flush_dcache_all();
invalidate_icache_all();
sync_n_cleanup_cache_all();
return 0;
}
__weak int board_prep_linux(bootm_headers_t *images) { return 0; }
/* Subcommand: PREP */
static void boot_prep_linux(bootm_headers_t *images)
static int boot_prep_linux(bootm_headers_t *images)
{
if (image_setup_linux(images))
hang();
int ret;
ret = image_setup_linux(images);
if (ret)
return ret;
return board_prep_linux(images);
}
__weak void smp_set_core_boot_addr(unsigned long addr, int corenr) {}
__weak void smp_kick_all_cpus(void) {}
/* Generic implementation for single core CPU */
__weak void board_jump_and_run(ulong entry, int zero, int arch, uint params)
{
void (*kernel_entry)(int zero, int arch, uint params);
kernel_entry = (void (*)(int, int, uint))entry;
kernel_entry(zero, arch, params);
}
/* Subcommand: GO */
static void boot_jump_linux(bootm_headers_t *images, int flag)
{
void (*kernel_entry)(int zero, int arch, uint params);
ulong kernel_entry;
unsigned int r0, r2;
int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
kernel_entry = (void (*)(int, int, uint))images->ep;
kernel_entry = images->ep;
debug("## Transferring control to Linux (at address %08lx)...\n",
(ulong) kernel_entry);
kernel_entry);
bootstage_mark(BOOTSTAGE_ID_RUN_OS);
printf("\nStarting kernel ...%s\n\n", fake ?
"(fake run for tracing)" : "");
bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
cleanup_before_linux();
if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
r0 = 2;
r2 = (unsigned int)images->ft_addr;
} else {
r0 = 1;
r2 = (unsigned int)getenv("bootargs");
r2 = (unsigned int)env_get("bootargs");
}
smp_set_core_boot_addr((unsigned long)kernel_entry, -1);
smp_kick_all_cpus();
cleanup_before_linux();
if (!fake)
kernel_entry(r0, 0, r2);
board_jump_and_run(kernel_entry, r0, 0, r2);
}
int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
......@@ -101,17 +106,13 @@ int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
return -1;
if (flag & BOOTM_STATE_OS_PREP) {
boot_prep_linux(images);
return 0;
}
if (flag & BOOTM_STATE_OS_PREP)
return boot_prep_linux(images);
if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
boot_jump_linux(images, flag);
return 0;
}
boot_prep_linux(images);
boot_jump_linux(images, flag);
return 0;
return -1;
}
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <common.h>
......
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2013-2015 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <asm/cache.h>
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
int init_cache_f_r(void)
{
#ifndef CONFIG_SYS_DCACHE_OFF
flush_dcache_all();
#endif
sync_n_cleanup_cache_all();
return 0;
}
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <common.h>
......
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright (C) 2013-2015 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <linux/linkage.h>
......
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 1989-2013 Free Software Foundation, Inc.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include "libgcc2.h"
......
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright (C) 1989-2013 Free Software Foundation, Inc.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#ifndef __ASM_LIBGCC_H
......
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#ifdef __LITTLE_ENDIAN__
#define WORD2 r2
#define SHIFT r3
#else /* __BIG_ENDIAN__ */
#define WORD2 r3
#define SHIFT r2
#endif /* _ENDIAN__ */
.global memcmp
.align 4
memcmp:
or %r12, %r0, %r1
asl_s %r12, %r12, 30
sub %r3, %r2, 1
brls %r2, %r12, .Lbytewise
ld %r4, [%r0, 0]
ld %r5, [%r1, 0]
lsr.f %lp_count, %r3, 3
lpne .Loop_end
ld_s WORD2, [%r0, 4]
ld_s %r12, [%r1, 4]
brne %r4, %r5, .Leven
ld.a %r4, [%r0, 8]
ld.a %r5, [%r1, 8]
brne WORD2, %r12, .Lodd
nop
.Loop_end:
asl_s SHIFT, SHIFT, 3
bhs_s .Last_cmp
brne %r4, %r5, .Leven
ld %r4, [%r0, 4]
ld %r5, [%r1, 4]
#ifdef __LITTLE_ENDIAN__
nop_s
/* one more load latency cycle */
.Last_cmp:
xor %r0, %r4, %r5
bset %r0, %r0, SHIFT
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
b.d .Leven_cmp
and %r1, %r1, 24
.Leven:
xor %r0, %r4, %r5
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
/* slow track insn */
and %r1, %r1, 24
.Leven_cmp:
asl %r2, %r4, %r1
asl %r12, %r5, %r1
lsr_s %r2, %r2, 1
lsr_s %r12, %r12, 1
j_s.d [%blink]
sub %r0, %r2, %r12
.balign 4
.Lodd:
xor %r0, WORD2, %r12
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
/* slow track insn */
and %r1, %r1, 24
asl_s %r2, %r2, %r1
asl_s %r12, %r12, %r1
lsr_s %r2, %r2, 1
lsr_s %r12, %r12, 1
j_s.d [%blink]
sub %r0, %r2, %r12
#else /* __BIG_ENDIAN__ */
.Last_cmp:
neg_s SHIFT, SHIFT
lsr %r4, %r4, SHIFT
lsr %r5, %r5, SHIFT
/* slow track insn */
.Leven:
sub.f %r0, %r4, %r5
mov.ne %r0, 1
j_s.d [%blink]
bset.cs %r0, %r0, 31
.Lodd:
cmp_s WORD2, %r12
mov_s %r0, 1
j_s.d [%blink]
bset.cs %r0, %r0, 31
#endif /* _ENDIAN__ */
.balign 4
.Lbytewise:
breq %r2, 0, .Lnil
ldb %r4, [%r0, 0]
ldb %r5, [%r1, 0]
lsr.f %lp_count, %r3
lpne .Lbyte_end
ldb_s %r3, [%r0, 1]
ldb %r12, [%r1, 1]
brne %r4, %r5, .Lbyte_even
ldb.a %r4, [%r0, 2]
ldb.a %r5, [%r1, 2]
brne %r3, %r12, .Lbyte_odd
nop
.Lbyte_end:
bcc .Lbyte_even
brne %r4, %r5, .Lbyte_even
ldb_s %r3, [%r0, 1]
ldb_s %r12, [%r1, 1]
.Lbyte_odd:
j_s.d [%blink]
sub %r0, %r3, %r12
.Lbyte_even:
j_s.d [%blink]
sub %r0, %r4, %r5
.Lnil:
j_s.d [%blink]
mov %r0, 0
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
.global memcpy
.align 4
memcpy:
or %r3, %r0, %r1
asl_s %r3, %r3, 30
mov_s %r5, %r0
brls.d %r2, %r3, .Lcopy_bytewise
sub.f %r3, %r2, 1
ld_s %r12, [%r1, 0]
asr.f %lp_count, %r3, 3
bbit0.d %r3, 2, .Lnox4
bmsk_s %r2, %r2, 1
st.ab %r12, [%r5, 4]
ld.a %r12, [%r1, 4]
.Lnox4:
lppnz .Lendloop
ld_s %r3, [%r1, 4]
st.ab %r12, [%r5, 4]
ld.a %r12, [%r1, 8]
st.ab %r3, [%r5, 4]
.Lendloop:
breq %r2, 0, .Last_store
ld %r3, [%r5, 0]
#ifdef __LITTLE_ENDIAN__
add3 %r2, -1, %r2
/* uses long immediate */
xor_s %r12, %r12, %r3
bmsk %r12, %r12, %r2
xor_s %r12, %r12, %r3
#else /* __BIG_ENDIAN__ */
sub3 %r2, 31, %r2
/* uses long immediate */
xor_s %r3, %r3, %r12
bmsk %r3, %r3, %r2
xor_s %r12, %r12, %r3
#endif /* _ENDIAN__ */
.Last_store:
j_s.d [%blink]
st %r12, [%r5, 0]
.balign 4
.Lcopy_bytewise:
jcs [%blink]
ldb_s %r12, [%r1, 0]
lsr.f %lp_count, %r3
bhs_s .Lnox1
stb.ab %r12, [%r5, 1]
ldb.a %r12, [%r1, 1]
.Lnox1:
lppnz .Lendbloop
ldb_s %r3, [%r1, 1]
stb.ab %r12, [%r5, 1]
ldb.a %r12, [%r1, 2]
stb.ab %r3, [%r5, 1]
.Lendbloop:
j_s.d [%blink]
stb %r12, [%r5, 0]
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
.global memset
.align 4
memset:
mov_s %r4, %r0
or %r12, %r0, %r2
bmsk.f %r12, %r12, 1
extb_s %r1, %r1
asl %r3, %r1, 8
beq.d .Laligned
or_s %r1, %r1, %r3
brls %r2, SMALL, .Ltiny
add %r3, %r2, %r0
stb %r1, [%r3, -1]
bclr_s %r3, %r3, 0
stw %r1, [%r3, -2]
bmsk.f %r12, %r0, 1
add_s %r2, %r2, %r12
sub.ne %r2, %r2, 4
stb.ab %r1, [%r4, 1]
and %r4, %r4, -2
stw.ab %r1, [%r4, 2]
and %r4, %r4, -4
.balign 4
.Laligned:
asl %r3, %r1, 16
lsr.f %lp_count, %r2, 2
or_s %r1, %r1, %r3
lpne .Loop_end
st.ab %r1, [%r4, 4]
.Loop_end:
j_s [%blink]
.balign 4
.Ltiny:
mov.f %lp_count, %r2
lpne .Ltiny_end
stb.ab %r1, [%r4, 1]
.Ltiny_end:
j_s [%blink]
/*
* memzero: @r0 = mem, @r1 = size_t
* memset: @r0 = mem, @r1 = char, @r2 = size_t
*/
.global memzero
.align 4
memzero:
/* adjust bzero args to memset args */
mov %r2, %r1
mov %r1, 0
/* tail call so need to tinker with blink */
b memset
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <common.h>
......@@ -9,7 +8,9 @@
#include <asm-generic/sections.h>
extern ulong __image_copy_start;
extern ulong __ivt_start;
extern ulong __ivt_end;
extern ulong __text_end;
DECLARE_GLOBAL_DATA_PTR;
......@@ -17,6 +18,9 @@ int copy_uboot_to_ram(void)
{
size_t len = (size_t)&__image_copy_end - (size_t)&__image_copy_start;
if (gd->flags & GD_FLG_SKIP_RELOC)
return 0;
memcpy((void *)gd->relocaddr, (void *)&__image_copy_start, len);
return 0;
......@@ -40,10 +44,13 @@ int do_elf_reloc_fixups(void)
Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
if (gd->flags & GD_FLG_SKIP_RELOC)
return 0;
debug("Section .rela.dyn is located at %08x-%08x\n",
(unsigned int)re_src, (unsigned int)re_end);
Elf32_Addr *offset_ptr_rom, *last_offset = NULL;
Elf32_Addr *offset_ptr_rom;
Elf32_Addr *offset_ptr_ram;
do {
......@@ -52,15 +59,28 @@ int do_elf_reloc_fixups(void)
/* Check that the location of the relocation is in .text */
if (offset_ptr_rom >= (Elf32_Addr *)&__image_copy_start &&
offset_ptr_rom > last_offset) {
unsigned int val;
offset_ptr_rom < (Elf32_Addr *)&__image_copy_end) {
unsigned int val, do_swap = 0;
/* Switch to the in-RAM version */
offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom +
gd->reloc_off);
debug("Patching value @ %08x (relocated to %08x)\n",
#ifdef __LITTLE_ENDIAN__
/* If location in ".text" section swap value */
if (((u32)offset_ptr_rom >= (u32)&__text_start &&
(u32)offset_ptr_rom <= (u32)&__text_end)
#if defined(__ARC700__) || defined(__ARC600__)
|| ((u32)offset_ptr_rom >= (u32)&__ivt_start &&
(u32)offset_ptr_rom <= (u32)&__ivt_end)
#endif
)
do_swap = 1;
#endif
debug("Patching value @ %08x (relocated to %08x)%s\n",
(unsigned int)offset_ptr_rom,
(unsigned int)offset_ptr_ram);
(unsigned int)offset_ptr_ram,
do_swap ? ", middle-endian encoded" : "");
/*
* Use "memcpy" because target location might be
......@@ -70,28 +90,45 @@ int do_elf_reloc_fixups(void)
*/
memcpy(&val, offset_ptr_ram, sizeof(int));
#ifdef __LITTLE_ENDIAN__
/* If location in ".text" section swap value */
if ((unsigned int)offset_ptr_rom <
(unsigned int)&__ivt_end)
if (do_swap)
val = (val << 16) | (val >> 16);
#endif
/* Check that the target points into executable */
if (val >= (unsigned int)&__image_copy_start && val <=
(unsigned int)&__image_copy_end) {
val += gd->reloc_off;
#ifdef __LITTLE_ENDIAN__
/* If location in ".text" section swap value */
if ((unsigned int)offset_ptr_rom <
(unsigned int)&__ivt_end)
val = (val << 16) | (val >> 16);
#endif
memcpy(offset_ptr_ram, &val, sizeof(int));
if (val < (unsigned int)&__image_copy_start ||
val > (unsigned int)&__image_copy_end) {
/* TODO: Use panic() instead of debug()
*
* For some reason GCC might generate
* fake relocation even for LD/SC of constant
* inderectly. See an example below:
* ----------------------->8--------------------
* static int setup_mon_len(void)
* {
* gd->mon_len = (ulong)&__bss_end - CONFIG_SYS_MONITOR_BASE;
* return 0;
* }
* ----------------------->8--------------------
*
* And that's what we get in the binary:
* ----------------------->8--------------------
* 10005cb4 <setup_mon_len>:
* 10005cb4: 193c 3f80 0003 2f80 st 0x32f80,[r25,60]
* 10005cb8: R_ARC_RELATIVE *ABS*-0x10000000
* 10005cbc: 7fe0 j_s.d [blink]
* 10005cbe: 700c mov_s r0,0
* ----------------------->8--------------------
*/
debug("Relocation target %08x points outside of image\n",
val);
}
}
last_offset = offset_ptr_rom;
val += gd->reloc_off;
if (do_swap)
val = (val << 16) | (val >> 16);
memcpy(offset_ptr_ram, &val, sizeof(int));
}
} while (++re_src < re_end);
return 0;
......
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <command.h>
#include <common.h>
__weak void reset_cpu(ulong addr)
{
/* Stop debug session here */
__builtin_arc_brk();
}
int do_reset(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
{
printf("Put your restart handler here\n");
printf("Resetting the board...\n");
reset_cpu(0);
#ifdef DEBUG
/* Stop debug session here */
__asm__("brk");
#endif
return 0;
}
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <asm-offsets.h>
......@@ -10,26 +9,6 @@
#include <asm/arcregs.h>
ENTRY(_start)
; ARCompact devices are not supposed to be SMP so master/slave check
; makes no sense.
#ifdef CONFIG_ISA_ARCV2
; Non-masters will be halted immediately, they might be kicked later
; by platform code right before passing control to the Linux kernel
; in bootm.c:boot_jump_linux().
lr r5, [identity]
lsr r5, r5, 8
bmsk r5, r5, 7
cmp r5, 0
mov.nz r0, r5
bz .Lmaster_proceed
flag 1
nop
nop
nop
.Lmaster_proceed:
#endif
/* Setup interrupt vector base that matches "__text_start" */
sr __ivt_start, [ARC_AUX_INTR_VEC_BASE]
......@@ -44,6 +23,14 @@ ENTRY(_start)
#endif
sr r5, [ARC_AUX_IC_CTRL]
mov r5, 1
sr r5, [ARC_AUX_IC_IVIC]
; As per ARC HS databook (see chapter 5.3.3.2)
; it is required to add 3 NOPs after each write to IC_IVIC.
nop
nop
nop
1:
; Disable/enable D-cache according to configuration
lr r5, [ARC_BCR_DC_BUILD]
......@@ -57,6 +44,10 @@ ENTRY(_start)
#endif
sr r5, [ARC_AUX_DC_CTRL]
mov r5, 1
sr r5, [ARC_AUX_DC_IVDC]
1:
#ifdef CONFIG_ISA_ARCV2
; Disable System-Level Cache (SLC)
......@@ -84,9 +75,20 @@ ENTRY(_start)
/* Initialize reserved area - note: r0 already contains address */
bl board_init_f_init_reserve
#ifdef CONFIG_DEBUG_UART
/* Earliest point to set up early debug uart */
bl debug_uart_init
#endif
/* Zero the one and only argument of "board_init_f" */
mov_s %r0, 0
j board_init_f
bl board_init_f
/* We only get here if relocation is disabled by GD_FLG_SKIP_RELOC */
/* Make sure we don't lose GD overwritten by zero new GD */
mov %r0, %r25
mov %r1, 0
bl board_init_r
ENDPROC(_start)
/*
......
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* ARC700 has a relatively long pipeline and branch prediction, so we want
* to avoid branches that are hard to predict. On the other hand, the
* presence of the norm instruction makes it easier to operate on whole
* words branch-free.
*/
.global strchr
.align 4
strchr:
extb_s %r1, %r1
asl %r5, %r1, 8
bmsk %r2, %r0, 1
or %r5, %r5, %r1
mov_s %r3, 0x01010101
breq.d %r2, %r0, .Laligned
asl %r4, %r5, 16
sub_s %r0, %r0, %r2
asl %r7, %r2, 3
ld_s %r2, [%r0]
#ifdef __LITTLE_ENDIAN__
asl %r7, %r3, %r7
#else /* __BIG_ENDIAN__ */
lsr %r7, %r3, %r7
#endif /* _ENDIAN__ */
or %r5, %r5, %r4
ror %r4, %r3
sub %r12, %r2, %r7
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0_ua
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r7
bic %r12, %r12, %r6
#ifdef __LITTLE_ENDIAN__
and %r7, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r7, 0, .Loop
/* Likewise this one */
b .Lfound_char
#else /* __BIG_ENDIAN__ */
and %r12, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r12, 0, .Loop
lsr_s %r12, %r12, 7
bic %r2, %r7, %r6
b.d .Lfound_char_b
and_s %r2, %r2, %r12
#endif /* _ENDIAN__ */
/* We require this code address to be unaligned for speed... */
.Laligned:
ld_s %r2, [%r0]
or %r5, %r5, %r4
ror %r4, %r3
/* ... so that this code address is aligned, for itself and ... */
.Loop:
sub %r12, %r2, %r3
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r3
bic %r12, %r12, %r6
and %r7, %r12, %r4
breq %r7, 0, .Loop
/*
*... so that this branch is unaligned.
* Found searched-for character.
* r0 has already advanced to next word.
*/
#ifdef __LITTLE_ENDIAN__
/*
* We only need the information about the first matching byte
* (i.e. the least significant matching byte) to be exact,
* hence there is no problem with carry effects.
*/
.Lfound_char:
sub %r3, %r7, 1
bic %r3, %r3, %r7
norm %r2, %r3
sub_s %r0, %r0, 1
asr_s %r2, %r2, 3
j.d [%blink]
sub_s %r0, %r0, %r2
.balign 4
.Lfound0_ua:
mov %r3, %r7
.Lfound0:
sub %r3, %r6, %r3
bic %r3, %r3, %r6
and %r2, %r3, %r4
or_s %r12, %r12, %r2
sub_s %r3, %r12, 1
bic_s %r3, %r3, %r12
norm %r3, %r3
add_s %r0, %r0, 3
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
sub_s %r0, %r0, %r12
j_s.d [%blink]
mov.pl %r0, 0
#else /* __BIG_ENDIAN__ */
.Lfound_char:
lsr %r7, %r7, 7
bic %r2, %r7, %r6
.Lfound_char_b:
norm %r2, %r2
sub_s %r0, %r0, 4
asr_s %r2, %r2, 3
j.d [%blink]
add_s %r0, %r0, %r2
.Lfound0_ua:
mov_s %r3, %r7
.Lfound0:
asl_s %r2, %r2, 7
or %r7, %r6, %r4
bic_s %r12, %r12, %r2
sub %r2, %r7, %r3
or %r2, %r2, %r6
bic %r12, %r2, %r12
bic.f %r3, %r4, %r12
norm %r3, %r3
add.pl %r3, %r3, 1
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
add_s %r0, %r0, %r12
j_s.d [%blink]
mov.mi %r0, 0
#endif /* _ENDIAN__ */
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* This is optimized primarily for the ARC700.
* It would be possible to speed up the loops by one cycle / word
* respective one cycle / byte by forcing double source 1 alignment, unrolling
* by a factor of two, and speculatively loading the second word / byte of
* source 1; however, that would increase the overhead for loop setup / finish,
* and strcmp might often terminate early.
*/
.global strcmp
.align 4
strcmp:
or %r2, %r0, %r1
bmsk_s %r2, %r2, 1
brne %r2, 0, .Lcharloop
mov_s %r12, 0x01010101
ror %r5, %r12
.Lwordloop:
ld.ab %r2, [%r0, 4]
ld.ab %r3, [%r1, 4]
nop_s
sub %r4, %r2, %r12
bic %r4, %r4, %r2
and %r4, %r4, %r5
brne %r4, 0, .Lfound0
breq %r2 ,%r3, .Lwordloop
#ifdef __LITTLE_ENDIAN__
xor %r0, %r2, %r3 /* mask for difference */
sub_s %r1, %r0, 1
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
sub %r1, %r5, %r0
xor %r0, %r5, %r1 /* mask for least significant difference byte */
and_s %r2, %r2, %r0
and_s %r3, %r3, %r0
#endif /* _ENDIAN__ */
cmp_s %r2, %r3
mov_s %r0, 1
j_s.d [%blink]
bset.lo %r0, %r0, 31
.balign 4
#ifdef __LITTLE_ENDIAN__
.Lfound0:
xor %r0, %r2, %r3 /* mask for difference */
or %r0, %r0, %r4 /* or in zero indicator */
sub_s %r1, %r0, 1
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
sub %r1, %r5, %r0
xor %r0, %r5, %r1 /* mask for least significant difference byte */
and_s %r2, %r2, %r0
and_s %r3, %r3, %r0
sub.f %r0, %r2, %r3
mov.hi %r0, 1
j_s.d [%blink]
bset.lo %r0, %r0, 31
#else /* __BIG_ENDIAN__ */
/*
* The zero-detection above can mis-detect 0x01 bytes as zeroes
* because of carry-propagateion from a lower significant zero byte.
* We can compensate for this by checking that bit0 is zero.
* This compensation is not necessary in the step where we
* get a low estimate for r2, because in any affected bytes
* we already have 0x00 or 0x01, which will remain unchanged
* when bit 7 is cleared.
*/
.balign 4
.Lfound0:
lsr %r0, %r4, 8
lsr_s %r1, %r2
bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */
bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */
or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */
cmp_s %r3, %r2 /* ... be independent of trailing garbage */
or_s %r2, %r2, %r0 /* likewise for r3 > r2 */
bic_s %r3, %r3, %r0
rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */
cmp_s %r2, %r3
j_s.d [%blink]
bset.lo %r0, %r0, 31
#endif /* _ENDIAN__ */
.balign 4
.Lcharloop:
ldb.ab %r2,[%r0,1]
ldb.ab %r3,[%r1,1]
nop_s
breq %r2, 0, .Lcmpend
breq %r2, %r3, .Lcharloop
.Lcmpend:
j_s.d [%blink]
sub %r0, %r2, %r3
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* If dst and src are 4 byte aligned, copy 8 bytes at a time.
* If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
* it 8 byte aligned. Thus, we can do a little read-ahead, without
* dereferencing a cache line that we should not touch.
* Note that short and long instructions have been scheduled to avoid
* branch stalls.
* The beq_s to r3z could be made unaligned & long to avoid a stall
* there, but it is not likely to be taken often, and it would also be likely
* to cost an unaligned mispredict at the next call.
*/
.global strcpy
.align 4
strcpy:
or %r2, %r0, %r1
bmsk_s %r2, %r2, 1
brne.d %r2, 0, charloop
mov_s %r10, %r0
ld_s %r3, [%r1, 0]
mov %r8, 0x01010101
bbit0.d %r1, 2, loop_start
ror %r12, %r8
sub %r2, %r3, %r8
bic_s %r2, %r2, %r3
tst_s %r2,%r12
bne r3z
mov_s %r4,%r3
.balign 4
loop:
ld.a %r3, [%r1, 4]
st.ab %r4, [%r10, 4]
loop_start:
ld.a %r4, [%r1, 4]
sub %r2, %r3, %r8
bic_s %r2, %r2, %r3
tst_s %r2, %r12
bne_s r3z
st.ab %r3, [%r10, 4]
sub %r2, %r4, %r8
bic %r2, %r2, %r4
tst %r2, %r12
beq loop
mov_s %r3, %r4
#ifdef __LITTLE_ENDIAN__
r3z: bmsk.f %r1, %r3, 7
lsr_s %r3, %r3, 8
#else /* __BIG_ENDIAN__ */
r3z: lsr.f %r1, %r3, 24
asl_s %r3, %r3, 8
#endif /* _ENDIAN__ */
bne.d r3z
stb.ab %r1, [%r10, 1]
j_s [%blink]
.balign 4
charloop:
ldb.ab %r3, [%r1, 1]
brne.d %r3, 0, charloop
stb.ab %r3, [%r10, 1]
j [%blink]