Rewrote crt0, init, and chainloading code

start.s, init.c, linker.ld and linker.specs are meant
to be re-used by user applications, should they remove the defines
from init.c and the .chainloader* sections from the linker script
This commit is contained in:
TuxSH
2018-05-07 23:32:45 +02:00
parent 699ddfc043
commit e8306361f0
29 changed files with 848 additions and 185 deletions

View File

@@ -0,0 +1,34 @@
#include "chainloader.h"
char g_chainloader_arg_data[CHAINLOADER_ARG_DATA_MAX_SIZE] = {0};
chainloader_entry_t g_chainloader_entries[CHAINLOADER_MAX_ENTRIES] = {0}; /* keep them sorted */
size_t g_chainloader_num_entries = 0;
uintptr_t g_chainloader_entrypoint = 0;
#pragma GCC optimize (3)
static void *xmemmove(void *dst, const void *src, size_t len)
{
const uint8_t *src8 = (const uint8_t *)src;
uint8_t *dst8 = (uint8_t *)dst;
if (dst8 < src8) {
for (size_t i = 0; i < len; i++) {
dst8[i] = src8[i];
}
} else if (src8 > dst8) {
for (size_t i = len; len > 0; len--)
dst8[i - 1] = src8[i - 1];
}
return dst;
}
void relocate_and_chainload_main(int argc) {
for(size_t i = 0; i < g_chainloader_num_entries; i++) {
chainloader_entry_t *entry = &g_chainloader_entries[i];
xmemmove((void *)entry->load_address, (const void *)entry->src_address, entry->size);
}
((void (*)(int, void *))g_chainloader_entrypoint)(argc, g_chainloader_arg_data);
}

View File

@@ -0,0 +1,25 @@
#ifndef FUSEE_CHAINLOADER_H
#define FUSEE_CHAINLOADER_H
#include <stddef.h>
#include <stdint.h>
#define CHAINLOADER_ARG_DATA_MAX_SIZE 0x6200
#define CHAINLOADER_MAX_ENTRIES 128
typedef struct chainloader_entry_t {
uintptr_t load_address;
uintptr_t src_address;
size_t size;
size_t num;
} chainloader_entry_t;
extern chainloader_entry_t g_chainloader_entries[CHAINLOADER_MAX_ENTRIES]; /* keep them sorted */
extern size_t g_chainloader_num_entries;
extern uintptr_t g_chainloader_entrypoint;
extern char g_chainloader_arg_data[CHAINLOADER_ARG_DATA_MAX_SIZE];
void relocate_and_chainload(int argc);
#endif

View File

@@ -28,7 +28,7 @@ void clock_enable_fuse(u32 enable);
void display_color_screen(u32 color);
/*! Init display in full 1280x720 resolution (32bpp, line stride 768, framebuffer size = 1280*768*4 bytes). */
u32 *display_init_framebuffer();
u32 *display_init_framebuffer(void *address);
/*! Enable or disable the backlight. Should only be called when the screen is completely set up, to avoid flickering. */
void display_enable_backlight(bool on);

View File

@@ -188,9 +188,9 @@ void display_enable_backlight(bool on) {
}
u32 *display_init_framebuffer(void)
u32 *display_init_framebuffer(void *address)
{
u32 *lfb_addr = (u32 *)0xC0000000;
u32 *lfb_addr = (u32 *)address;
//This configures the framebuffer @ 0xC0000000 with a resolution of 1280x720 (line stride 768).
exec_cfg((u32 *)DISPLAY_A_BASE, cfg_display_framebuffer, 32);

View File

@@ -52,7 +52,7 @@ void display_end();
void display_color_screen(u32 color);
/*! Init display in full 1280x720 resolution (32bpp, line stride 768, framebuffer size = 1280*768*4 bytes). */
u32 *display_init_framebuffer(void);
u32 *display_init_framebuffer(void *address);
/*! Enable or disable the backlight. Should only be called when the screen is completely set up, to avoid flickering. */
void display_enable_backlight(bool on);

View File

@@ -0,0 +1,130 @@
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include <malloc.h>
#include <sys/iosupport.h>
#include "utils.h"
void __libc_init_array(void);
void __libc_fini_array(void);
extern uint8_t __bss_start__[], __bss_end__[];
extern uint8_t __heap_start__[], __heap_end__[];
extern char *fake_heap_start;
extern char *fake_heap_end;
int __program_argc;
void **__program_argv;
void __attribute__((noreturn)) __program_exit(int rc);
void __attribute__((noreturn)) (*__program_exit_callback)(int rc) = NULL;
static void __program_parse_argc_argv(int argc, char *argdata);
static void __program_cleanup_argv(void);
static void __program_init_heap(void) {
fake_heap_start = (char*)__heap_start__;
fake_heap_end = (char*)__heap_end__;
}
static void __program_init_newlib_hooks(void) {
__syscalls.exit = __program_exit; /* For exit, etc. */
}
static void __program_move_additional_sections(void) {
#if defined(FUSEE_STAGE1_SRC) || defined(FUSEE_STAGE2_SRC)
extern uint8_t __chainloader_lma__[], __chainloader_start__[], __chainloader_bss_start__[], __chainloader_end__[];
memcpy(__chainloader_start__, __chainloader_lma__, __chainloader_bss_start__ - __chainloader_start__);
memset(__chainloader_bss_start__, 0, __chainloader_end__ - __chainloader_bss_start__);
#endif
}
void __program_init(int argc, char *argdata) {
/* Zero-fill the .bss section */
memset(__bss_start__, 0, __bss_end__ - __bss_start__);
__program_init_heap();
__program_init_newlib_hooks();
__program_parse_argc_argv(argc, argdata);
/* Once argv is parsed, we can discard the low IRAM region */
__program_move_additional_sections();
__libc_init_array();
}
void __program_exit(int rc) {
__libc_fini_array();
__program_cleanup_argv();
if (__program_exit_callback == NULL) {
/* Default callback */
generic_panic();
} else {
__program_exit_callback(rc);
}
for (;;);
}
#ifdef FUSEE_STAGE1_SRC
static void __program_parse_argc_argv(int argc, char *argdata) {
__program_argc = 0;
__program_argv = NULL;
}
#elif defined(FUSEE_STAGE2_SRC)
#include "stage2.h"
static void __program_parse_argc_argv(int argc, char *argdata) {
size_t pos = 0, len;
__program_argc = argc;
__program_argv = malloc(argc * sizeof(void **));
if (__program_argv == NULL) {
generic_panic();
}
len = strlen(argdata);
__program_argv[0] = malloc(len + 1);
if (__program_argv[0] == NULL) {
generic_panic();
}
strcpy((char *)__program_argv[0], argdata);
pos += len + 1;
__program_argv[1] = malloc(len + 1);
if (__program_argv[1] == NULL) {
generic_panic();
}
memcpy(__program_argv[1], argdata + pos, sizeof(stage2_args_t));
}
#else
static void __program_parse_argc_argv(int argc, char *argdata) {
size_t pos = 0, len;
__program_argc = argc;
__program_argv = malloc(argc * sizeof(void **));
if (__program_argv == NULL) {
generic_panic();
}
for (int i = 0; i < argc; i++) {
len = strlen(argdata + pos);
__program_argv[i] = malloc(len + 1);
if (__program_argv[i] == NULL) {
generic_panic();
}
strcpy((char *)__program_argv[i], argdata + pos);
pos += len + 1;
}
}
#endif
static void __program_cleanup_argv(void) {
#ifndef FUSEE_STAGE1_SRC
for (int i = 0; i < __program_argc; i++) {
free(__program_argv[i]);
__program_argv[i] = NULL;
}
free(__program_argv);
#endif
}

View File

@@ -15,7 +15,7 @@
/ and optional writing functions as well. */
#define FF_FS_MINIMIZE 3
#define FF_FS_MINIMIZE 0
/* This option defines minimization level to remove some basic API functions.
/
/ 0: Basic functions are fully enabled.

View File

@@ -4,15 +4,13 @@
#include "se.h"
#include "sd_utils.h"
#include "stage2.h"
#include "chainloader.h"
#include "sdmmc.h"
#include "lib/fatfs/ff.h"
#include "lib/printk.h"
#include "display/video_fb.h"
/* TODO: Should we allow more than 32K for the BCT0? */
#define BCT0_LOAD_ADDRESS (uintptr_t)(0x40038000)
#define BCT0_LOAD_END_ADDRESS (uintptr_t)(0x4003F000)
#define MAGIC_BCT0 0x30544342
static char g_bct0_buffer[BCTO_MAX_SIZE];
#define DEFAULT_BCT0_FOR_DEBUG \
"BCT0\n"\
@@ -22,21 +20,21 @@
"stage2_entrypoint = 0xFFF00000\n"
const char *load_config(void) {
if (!read_sd_file((void *)BCT0_LOAD_ADDRESS, BCT0_LOAD_END_ADDRESS - BCT0_LOAD_ADDRESS, "BCT.ini")) {
if (!read_sd_file(g_bct0_buffer, BCTO_MAX_SIZE, "BCT.ini")) {
printk("Failed to read BCT0 from SD!\n");
printk("[DEBUG] Using default BCT0!\n");
memcpy((void *)BCT0_LOAD_ADDRESS, DEFAULT_BCT0_FOR_DEBUG, sizeof(DEFAULT_BCT0_FOR_DEBUG));
memcpy(g_bct0_buffer, DEFAULT_BCT0_FOR_DEBUG, sizeof(DEFAULT_BCT0_FOR_DEBUG));
/* TODO: Stop using default. */
/* printk("Error: Failed to load BCT.ini!\n");
* generic_panic(); */
}
if ((*((u32 *)(BCT0_LOAD_ADDRESS))) != MAGIC_BCT0) {
if (memcmp(g_bct0_buffer, "BCT0", 4) != 0) {
printk("Error: Unexpected magic in BCT.ini!\n");
generic_panic();
}
/* Return pointer to first line of the ini. */
const char *bct0 = (const char *)BCT0_LOAD_ADDRESS;
const char *bct0 = g_bct0_buffer;
while (*bct0 && *bct0 != '\n') {
bct0++;
}
@@ -60,12 +58,10 @@ void load_sbk(void) {
}
int main(void) {
stage2_entrypoint_t stage2_entrypoint;
void **stage2_argv = (void **)(BCT0_LOAD_END_ADDRESS);
const char *bct0;
u32 *lfb_base;
char buf[0x400];
memset(buf, 0xCC, 0x400);
const char *stage2_path;
stage2_args_t stage2_args = {0};
/* Initialize DRAM. */
/* TODO: What can be stripped out to make this minimal? */
@@ -75,7 +71,7 @@ int main(void) {
display_init();
/* Register the display as a printk provider. */
lfb_base = display_init_framebuffer();
lfb_base = display_init_framebuffer((void *)0xC0000000);
video_init(lfb_base);
/* Turn on the backlight after initializing the lfb */
@@ -102,23 +98,21 @@ int main(void) {
bct0 = load_config();
/* Load the loader payload into DRAM. */
stage2_entrypoint = load_stage2(bct0);
load_stage2(bct0);
/* Setup argv. */
memset(stage2_argv, 0, STAGE2_ARGC * sizeof(*stage2_argv));
stage2_argv[STAGE2_ARGV_PROGRAM_PATH] = (void *)stage2_get_program_path();
stage2_argv[STAGE2_ARGV_ARGUMENT_STRUCT] = &stage2_argv[STAGE2_ARGC];
stage2_args_t *args = (stage2_args_t *)stage2_argv[STAGE2_ARGV_ARGUMENT_STRUCT];
/* Setup arguments struct. */
args->version = 0;
args->bct0 = bct0;
args->lfb = (uint32_t *)lfb_base;
args->console_col = video_get_col();
args->console_row = video_get_row();
f_unmount("");
display_enable_backlight(false);
display_end();
/* Setup argument data. */
stage2_path = stage2_get_program_path();
stage2_args.version = 0;
strcpy(stage2_args.bct0, bct0);
strcpy(g_chainloader_arg_data, stage2_path);
memcpy(g_chainloader_arg_data + strlen(stage2_path) + 1, &stage2_args, sizeof(stage2_args_t));
/* Jump to Stage 2. */
stage2_entrypoint(STAGE2_ARGC, stage2_argv);
relocate_and_chainload(STAGE2_ARGC);
return 0;
}

View File

@@ -1,12 +1,15 @@
#include "utils.h"
#include <stdint.h>
#include "display/video_fb.h"
#include "sd_utils.h"
#include "stage2.h"
#include "chainloader.h"
#include "lib/printk.h"
#include "lib/vsprintf.h"
#include "lib/ini.h"
#include "lib/fatfs/ff.h"
char g_stage2_path[0x300] = {0};
char g_stage2_path[0x100] = {0};
const char *stage2_get_program_path(void) {
return g_stage2_path;
@@ -22,13 +25,13 @@ static int stage2_ini_handler(void *user, const char *section, const char *name,
/* Read in load address as a hex string. */
sscanf(value, "%x", &x);
config->load_address = x;
if (config->entrypoint == NULL) {
config->entrypoint = (stage2_entrypoint_t)config->load_address;
if (config->entrypoint == 0) {
config->entrypoint = config->load_address;
}
} else if (strcmp(name, STAGE2_ENTRYPOINT_KEY) == 0) {
/* Read in entrypoint as a hex string. */
sscanf(value, "%x", &x);
config->entrypoint = (stage2_entrypoint_t)x;
config->entrypoint = x;
} else {
return 0;
}
@@ -38,8 +41,11 @@ static int stage2_ini_handler(void *user, const char *section, const char *name,
return 1;
}
stage2_entrypoint_t load_stage2(const char *bct0) {
void load_stage2(const char *bct0) {
stage2_config_t config = {0};
FILINFO info;
size_t size;
uintptr_t tmp_addr;
if (ini_parse_string(bct0, stage2_ini_handler, &config) < 0) {
printk("Error: Failed to parse BCT.ini!\n");
@@ -51,17 +57,65 @@ stage2_entrypoint_t load_stage2(const char *bct0) {
generic_panic();
}
if (strlen(config.path) + 1 + sizeof(stage2_args_t) > CHAINLOADER_ARG_DATA_MAX_SIZE) {
printk("Error: Stage2's path name is too big!\n");
}
if (!check_32bit_address_loadable(config.entrypoint)) {
printk("Error: Stage2's entrypoint is invalid!\n");
generic_panic();
}
if (!check_32bit_address_loadable(config.load_address)) {
printk("Error: Stage2's load address is invalid!\n");
generic_panic();
}
printk("[DEBUG] Stage 2 Config:\n");
printk(" File Path: %s\n", config.path);
printk(" Load Address: 0x%08x\n", config.load_address);
printk(" Entrypoint: 0x%p\n", config.entrypoint);
if (!read_sd_file((void *)config.load_address, 0x100000, config.path)) {
if (f_stat(config.path, &info) != FR_OK) {
printk("Error: Failed to stat stage2 (%s)!\n", config.path);
generic_panic();
}
size = (size_t)info.fsize;
/* the LFB is located at 0xC0000000 atm */
if (size > 0xC0000000u - 0x80000000u) {
printk("Error: Stage2 is way too big!\n");
generic_panic();
}
if (!check_32bit_address_range_loadable(config.load_address, size)) {
printk("Error: Stage2 has an invalid load address & size combination!\n");
generic_panic();
}
if (config.entrypoint < config.load_address || config.entrypoint >= config.load_address + size) {
printk("Error: Stage2's entrypoint is outside Stage2!\n");
generic_panic();
}
if (check_32bit_address_range_in_program(config.load_address, size)) {
tmp_addr = 0x80000000u;
} else {
tmp_addr = config.load_address;
}
if (read_sd_file((void *)tmp_addr, size, config.path) != size) {
printk("Error: Failed to read stage2 (%s)!\n", config.path);
generic_panic();
}
strncpy(g_stage2_path, config.path, sizeof(g_stage2_path));
g_chainloader_num_entries = 1;
g_chainloader_entries[0].load_address = config.load_address;
g_chainloader_entries[0].src_address = tmp_addr;
g_chainloader_entries[0].size = size;
g_chainloader_entries[0].num = 0;
g_chainloader_entrypoint = config.entrypoint;
return config.entrypoint;
strncpy(g_stage2_path, config.path, sizeof(g_stage2_path));
}

View File

@@ -11,24 +11,23 @@
#define STAGE2_NAME_KEY "stage2_path"
#define STAGE2_ADDRESS_KEY "stage2_addr"
#define STAGE2_ENTRYPOINT_KEY "stage2_entrypoint"
typedef void (*stage2_entrypoint_t)(int argc, void **argv);
#define BCTO_MAX_SIZE 0x6000
typedef struct {
char path[0x300];
char path[0x100];
uintptr_t load_address;
stage2_entrypoint_t entrypoint;
uintptr_t entrypoint;
} stage2_config_t;
typedef struct {
uint32_t version;
const char *bct0;
uint32_t *lfb;
uint32_t console_row;
uint32_t console_col;
char bct0[BCTO_MAX_SIZE];
} stage2_args_t;
const char *stage2_get_program_path(void);
stage2_entrypoint_t load_stage2(const char *bct0);
void load_stage2(const char *bct0);
#endif

View File

@@ -2,30 +2,27 @@
mov r\@, #0
.endm
.section .text.start
.section .text.start, "ax", %progbits
.arm
.align 5
.global _start
.type _start, %function
_start:
/* Insert NOPs for convenience (i.e. to use Nintendo's BCTs, for example) */
.rept 16
nop
.endr
/* Switch to supervisor mode, mask all interrupts, clear all flags */
/* Switch to system mode, mask all interrupts, clear all flags */
msr cpsr_cxsf, #0xDF
/* Relocate ourselves if necessary */
ldr r0, =__start__
adr r1, _start
cmp r0, r1
ldr r2, =__start__
adr r3, _start
cmp r2, r3
bne _relocation_loop_end
ldr r2, =__bss_start__
sub r2, r2, r0 /* size >= 32, obviously */
ldr r4, =__bss_start__
sub r4, r4, r2 /* size >= 32, obviously, and we've declared 32-byte-alignment */
_relocation_loop:
ldmia r1!, {r3-r10}
stmia r0!, {r3-r10}
subs r2, #0x20
ldmia r3!, {r5-r12}
stmia r2!, {r5-r12}
subs r4, #0x20
bne _relocation_loop
ldr r12, =_relocation_loop_end
@@ -33,22 +30,25 @@ _start:
_relocation_loop_end:
/* Set the stack pointer */
ldr sp, =0x40008000
mov fp, #0
ldr sp, =__stack_top__
mov fp, #0
bl __program_init
/* Clear .bss */
ldr r0, =__bss_start__
mov r1, #0
ldr r2, =__bss_end__
sub r2, r2, r0
bl memset
/* Call global constructors */
bl __libc_init_array
/* Set r0 to r12 to 0 (because why not?) & call main */
/* Set r0 to r12 to 0 (for debugging) & call main */
.rept 13
CLEAR_GPR_REG_ITER
.endr
bl main
b .
ldr r0, =__program_argc
ldr r1, =__program_argv
ldr lr, =__program_exit
b main
/* No need to include this in normal programs: */
.section .chainloader.text.start, "ax", %progbits
.arm
.align 5
.global relocate_and_chainload
.type relocate_and_chainload, %function
relocate_and_chainload:
ldr sp, =__stack_top__
b relocate_and_chainload_main

View File

@@ -71,9 +71,38 @@ static inline bool check_32bit_additive_overflow(uint32_t a, uint32_t b) {
return __builtin_add_overflow_p(a, b, (uint32_t)0);
}
static inline bool check_32bit_address_loadable(uintptr_t addr) {
/* FWIW the bootROM forbids loading anything between 0x40000000 and 0x40010000, using it for itself... */
return (addr >= 0x40010000u && addr < 0x40040000u) || addr >= 0x80000000u;
}
static inline bool check_32bit_address_range_loadable(uintptr_t addr, size_t size) {
return
__builtin_add_overflow_p(addr, size, (uintptr_t)0) && /* the range doesn't overflow */
check_32bit_address_loadable(addr) && check_32bit_address_loadable(addr + size) && /* bounds are valid */
!(addr >= 0x40010000u && addr + size >= 0x40040000u) /* the range doesn't cross MMIO */
;
}
bool overlaps(uint64_t as, uint64_t ae, uint64_t bs, uint64_t be);
static inline bool overlaps_a(const void *as, const void *ae, const void *bs, const void *be) {
return overlaps((uint64_t)(uintptr_t)as, (uint64_t)(uintptr_t)ae, (uint64_t)(uintptr_t)bs, (uint64_t)(uintptr_t)be);
}
static inline bool check_32bit_address_range_in_program(uintptr_t addr, size_t size) {
extern uint8_t __chainloader_start__[], __chainloader_end__[];
extern uint8_t __stack_bottom__[], __stack_top__[];
extern uint8_t __start__[], __end__[];
uint8_t *start = (uint8_t *)addr, *end = start + size;
return overlaps_a(start, end, __chainloader_start__, __chainloader_end__) ||
overlaps_a(start, end, __stack_bottom__, __stack_top__) ||
overlaps_a(start, end, (void *)0xC0000000, (void *)0xC03C0000) || /* framebuffer */
overlaps_a(start, end, __start__, __end__);
}
void panic(uint32_t code);
void generic_panic(void);
void panic_predefined(uint32_t which);
bool overlaps(uint64_t as, uint64_t ae, uint64_t bs, uint64_t be);
#endif