apps/plugins/SOURCES | 1 + apps/plugins/plugin.lds | 2 +- firmware/target/arm/as3525/app.lds | 2 +- firmware/target/arm/as3525/ata_sd_as3525.c | 67 ++++++++++++++++++++++----- 4 files changed, 57 insertions(+), 15 deletions(-) diff --git a/apps/plugins/SOURCES b/apps/plugins/SOURCES index 6edb734..9019997 100644 --- a/apps/plugins/SOURCES +++ b/apps/plugins/SOURCES @@ -1,3 +1,4 @@ +test_disk.c /* plugins common to all models */ chessclock.c credits.c diff --git a/apps/plugins/plugin.lds b/apps/plugins/plugin.lds index fcfbf3b..68c63cc 100644 --- a/apps/plugins/plugin.lds +++ b/apps/plugins/plugin.lds @@ -136,7 +136,7 @@ OUTPUT_FORMAT(elf32-littlemips) #define CODEC_ORIGIN (IRAM_ORIG + IRAM_SIZE - CODEC_SIZE) #define PLUGIN_ORIGIN (DRAM_ORIG + DRAMSIZE) #else -#define IRAMORIG (IRAM_ORIG + 0x20000) +#define IRAMORIG (IRAM_ORIG + 0x22000) #define IRAMSIZE (IRAM_ORIG + IRAM_SIZE - IRAMORIG) #endif diff --git a/firmware/target/arm/as3525/app.lds b/firmware/target/arm/as3525/app.lds index eb5b28f..df31df8 100644 --- a/firmware/target/arm/as3525/app.lds +++ b/firmware/target/arm/as3525/app.lds @@ -31,7 +31,7 @@ STARTUP(target/arm/crt0.o) #else #define DRAMSIZE (DRAM_SIZE - STUBOFFSET - PLUGINSIZE - CODECSIZE - TTB_SIZE) #define CODECORIG (ENDAUDIOADDR) -#define IRAMSIZE (0x20000) +#define IRAMSIZE (0x22000) #endif diff --git a/firmware/target/arm/as3525/ata_sd_as3525.c b/firmware/target/arm/as3525/ata_sd_as3525.c index 85b1daf..5757cd3 100644 --- a/firmware/target/arm/as3525/ata_sd_as3525.c +++ b/firmware/target/arm/as3525/ata_sd_as3525.c @@ -25,12 +25,12 @@ /* TODO: Find the real capacity of >2GB models (will be useful for USB) */ #include "config.h" /* for HAVE_MULTIDRIVE & AMS_OF_SIZE */ +#include "cpu.h" #include "fat.h" #include "thread.h" #include "led.h" #include "hotswap.h" #include "system.h" -#include "cpu.h" #include #include #include @@ -137,9 +137,22 @@ static struct wakeup transfer_completion_signal; static volatile unsigned int transfer_error[NUM_VOLUMES]; #define PL180_MAX_TRANSFER_ERRORS 10 -#define UNALIGNED_NUM_SECTORS 10 -static unsigned char aligned_buffer[UNALIGNED_NUM_SECTORS* SD_BLOCK_SIZE] __attribute__((aligned(32))); /* align on cache line size */ -static unsigned char *uncached_buffer = UNCACHED_ADDR(&aligned_buffer[0]); +#ifdef AMS_LOWMEM /* each 10 cost 5k, save a bit on lowmem */ +#define UNALIGNED_NUM_SECTORS 32 +#define SDBUF_ORIG 0 +#define ALIGNEDBUF_ATTR +#else +#define UNALIGNED_NUM_SECTORS 64 +/* need physical addresses for DMA, which IBSS_ATTR doesn't give */ +#define SDBUF_ORIG IRAM_ORIG +/* enough space in IRAM */ +#define ALIGNEDBUF_ATTR IBSS_ATTR +#endif +static unsigned char sd_buffer[UNALIGNED_NUM_SECTORS* SD_BLOCK_SIZE] + ALIGNEDBUF_ATTR __attribute__((aligned(32))); /* align on cache line size */ +static unsigned char *aligned_buffer = /* physical addr of sd buffer for dma*/ + (unsigned char*)((intptr_t)sd_buffer - SDBUF_ORIG); +static unsigned char *uncached_buffer = UNCACHED_ADDR(&sd_buffer[0]); static inline void mci_delay(void) { @@ -690,7 +703,10 @@ static int sd_transfer_sectors(IF_MD2(int drive,) unsigned long start, #endif int ret = 0; unsigned loops = 0; + unsigned transfer = 0; + if (count <= 0) + return ret; /* skip SanDisk OF */ if (drive == INTERNAL_AS3525) start += AMS_OF_SIZE; @@ -712,11 +728,27 @@ static int sd_transfer_sectors(IF_MD2(int drive,) unsigned long start, dma_retain(); - while(count) +#ifdef AMS_LOWMEM + /* we use smaller (32*SECTOR_SIZE) sd_buffer and buf directly if aligned + * (with cache coherency functions) on lowmem; + * that gives worse but still good performance compared to a + * 64*SECTOR_SIZE buffer in iram, while saving memory */ + bool _buf_unaligned = (intptr_t)buf & 3; + if (!_buf_unaligned) + { + if (write) + invalidate_dcache_range(buf, count * SD_BLOCK_SIZE); + else + dump_dcache_range(buf, count * SD_BLOCK_SIZE); + } +#else +#define _buf_unaligned true +#endif + + /* at the beginning, transfer a small 4k chunk to have data available early */ + transfer = (count > 8)? 8:count; /* sectors */ + do { - /* 128 * 512 = 2^16, and doesn't fit in the 16 bits of DATA_LENGTH - * register, so we have to transfer maximum 127 sectors at a time. */ - unsigned int transfer = (count >= 128) ? 127 : count; /* sectors */ void *dma_buf; const int cmd = write ? SD_WRITE_MULTIPLE_BLOCK : SD_READ_MULTIPLE_BLOCK; @@ -750,12 +782,17 @@ static int sd_transfer_sectors(IF_MD2(int drive,) unsigned long start, if(!(card_info[drive].ocr & (1<<30))) /* not SDHC */ bank_start *= SD_BLOCK_SIZE; - dma_buf = aligned_buffer; if(transfer > UNALIGNED_NUM_SECTORS) transfer = UNALIGNED_NUM_SECTORS; - if(write) - memcpy(uncached_buffer, buf, transfer * SD_BLOCK_SIZE); + if (_buf_unaligned) + { + dma_buf = aligned_buffer; + if(write) + memcpy(uncached_buffer, buf, transfer * SECTOR_SIZE); + } + else + dma_buf = buf; ret = sd_wait_for_state(drive, SD_TRAN); if (ret < 0) @@ -813,7 +850,7 @@ static int sd_transfer_sectors(IF_MD2(int drive,) unsigned long start, if(!transfer_error[drive]) { - if(!write) + if(!write && _buf_unaligned) memcpy(buf, uncached_buffer, transfer * SD_BLOCK_SIZE); buf += transfer * SD_BLOCK_SIZE; start += transfer; @@ -823,7 +860,11 @@ static int sd_transfer_sectors(IF_MD2(int drive,) unsigned long start, else if(loops++ > PL180_MAX_TRANSFER_ERRORS) panicf("SD Xfer %s err:0x%x Disk%d", (write? "write": "read"), transfer_error[drive], drive); - } + + /* 128 * 512 = 2^16, and doesn't fit in the 16 bits of DATA_LENGTH + * register, so we have to transfer maximum 127 sectors at a time. */ + transfer = (count >= 128) ? 127 : count; + } while(count > 0); ret = 0; /* success */