Index: apps/codecs/libmad/synth.c =================================================================== --- apps/codecs/libmad/synth.c (revision 26255) +++ apps/codecs/libmad/synth.c (working copy) @@ -829,55 +829,57 @@ #elif defined(FPM_ARM) #define PROD_O(hi, lo, f, ptr) \ - ({ \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #0]\n\t" \ - "smull %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #56]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #48]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #40]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #32]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #24]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #16]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #8]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ + ({ \ + mad_fixed_t *__p = (f); \ + asm volatile ( \ + "ldmia %2!, {r0, r1, r2, r3} \n\t" \ + "ldr r4 , [%3, #0] \n\t" \ + "ldr r12, [%3, #56] \n\t" \ + "smull %0, %1, r0, r4 \n\t" \ + "ldr r4 , [%3, #48] \n\t" \ + "smlal %0, %1, r1, r12 \n\t" \ + "ldr r12, [%3, #40] \n\t" \ + "smlal %0, %1, r2, r4 \n\t" \ + "smlal %0, %1, r3, r12 \n\t" \ + "ldmia %2, {r0, r1, r2, r3} \n\t" \ + "ldr r4 , [%3, #32] \n\t" \ + "ldr r12, [%3, #24] \n\t" \ + "smlal %0, %1, r0, r4 \n\t" \ + "ldr r4 , [%3, #16] \n\t" \ + "smlal %0, %1, r1, r12 \n\t" \ + "ldr r12, [%3, #8] \n\t" \ + "smlal %0, %1, r2, r4 \n\t" \ + "smlal %0, %1, r3, r12" \ : "=&r" (lo), "=&r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ + : "r" (ptr) \ + : "r0", "r1", "r2", "r3", "r4", "r12", "memory"); \ }) #define PROD_A(hi, lo, f, ptr) \ - ({ \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #0]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #56]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #48]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #40]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #32]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #24]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #16]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #8]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ + ({ \ + mad_fixed_t *__p = (f); \ + asm volatile ( \ + "ldmia %2!, {r0, r1, r2, r3} \n\t" \ + "ldr r4 , [%3, #0] \n\t" \ + "ldr r12, [%3, #56] \n\t" \ + "smlal %0, %1, r0, r4 \n\t" \ + "ldr r4 , [%3, #48] \n\t" \ + "smlal %0, %1, r1, r12 \n\t" \ + "ldr r12, [%3, #40] \n\t" \ + "smlal %0, %1, r2, r4 \n\t" \ + "smlal %0, %1, r3, r12 \n\t" \ + "ldmia %2, {r0, r1, r2, r3} \n\t" \ + "ldr r4 , [%3, #32] \n\t" \ + "ldr r12, [%3, #24] \n\t" \ + "smlal %0, %1, r0, r4 \n\t" \ + "ldr r4 , [%3, #16] \n\t" \ + "smlal %0, %1, r1, r12 \n\t" \ + "ldr r12, [%3, #8] \n\t" \ + "smlal %0, %1, r2, r4 \n\t" \ + "smlal %0, %1, r3, r12" \ : "+r" (lo), "+r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ + : "r" (ptr) \ + : "r0", "r1", "r2", "r3", "r4", "r12", "memory"); \ }) void synth_full_odd_sbsample (mad_fixed_t *pcm, Index: apps/codecs/libmusepack/mpcdec_math.h =================================================================== --- apps/codecs/libmusepack/mpcdec_math.h (revision 26255) +++ apps/codecs/libmusepack/mpcdec_math.h (working copy) @@ -115,32 +115,34 @@ return t1; } #elif defined(CPU_ARM) - // borrowed and adapted from libMAD + /* Calculate: result = (X*Y)>>14 */ + /* Use scratch register r12 */ #define MPC_MULTIPLY(X,Y) \ ({ \ - MPC_SAMPLE_FORMAT low; \ - MPC_SAMPLE_FORMAT high; \ - asm volatile ( /* will calculate: result = (X*Y)>>14 */ \ - "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \ - "mov %0, %0, lsr #14 \n\t" /* %0 = %0 >> 14 */ \ - "orr %0, %0, %1, lsl #18 \n\t"/* result = %0 OR (%1 << 18) */ \ - : "=&r"(low), "=&r" (high) \ - : "r"(X),"r"(Y)); \ - low; \ + MPC_SAMPLE_FORMAT res; \ + asm volatile ( \ + "smull %[res], r12, %[x], %[y] \n\t" /* multiply */ \ + "mov %[res], %[res], lsr #14 \n\t" /* %[res] >>= 14 */ \ + "orr %[res], %[res], r12, lsl #18" /* %[res] = %[res] | (r12 << 18) */ \ + : [res]"=&r"(res) \ + : [x]"r"(X), [y]"r"(Y) \ + : "r12" ); \ + res; \ }) - // borrowed and adapted from libMAD + /* Calculate: result = (X*Y)>>Z */ + /* Use scratch register r12 */ #define MPC_MULTIPLY_EX(X,Y,Z) \ ({ \ - MPC_SAMPLE_FORMAT low; \ - MPC_SAMPLE_FORMAT high; \ - asm volatile ( /* will calculate: result = (X*Y)>>Z */ \ - "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \ - "mov %0, %0, lsr %4 \n\t" /* %0 = %0 >> Z */ \ - "orr %0, %0, %1, lsl %5 \n\t" /* result = %0 OR (%1 << (32-Z)) */ \ - : "=&r"(low), "=&r" (high) \ - : "r"(X),"r"(Y),"r"(Z),"r"(32-Z)); \ - low; \ + MPC_SAMPLE_FORMAT res; \ + asm volatile ( \ + "smull %[res], r12, %[x], %[y] \n\t" /* multiply */ \ + "mov %[res], %[res], lsr %[shr] \n\t" /* %[res] >>= Z */ \ + "orr %[res], %[res], r12, lsl %[shl]" /* %[res] = %[res] OR (r12 << (32-Z)) */ \ + : [res]"=&r"(res) \ + : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z) \ + : "r12" ); \ + res; \ }) #else /* libmusepack standard */ @@ -188,16 +190,17 @@ t; \ }) #elif defined(CPU_ARM) - // borrowed and adapted from libMAD + /* Calculate: result = (X*Y)>>32, without need for >>32 */ + /* Use scratch register r12 */ #define MPC_MULTIPLY_FRACT(X,Y) \ ({ \ - MPC_SAMPLE_FORMAT low; \ - MPC_SAMPLE_FORMAT high; \ - asm volatile ( /* will calculate: result = (X*Y)>>32 */ \ - "smull %0,%1,%2,%3 \n\t" /* multiply with result %0 [0..31], %1 [32..63] */ \ - : "=&r"(low), "=&r" (high) /* result = %1 [32..63], saves the >>32 */ \ - : "r"(X),"r"(Y)); \ - high; \ + MPC_SAMPLE_FORMAT res; \ + asm volatile ( \ + "smull r12, %[res], %[x], %[y]" /* %[res] = higher bits of X*Y result */ \ + : [res]"=&r"(res) \ + : [x]"r"(X), [y]"r"(Y) \ + : "r12" ); \ + res; \ }) #else #define MPC_MULTIPLY_FRACT(X,Y) MPC_MULTIPLY_EX(X,Y,32) Index: apps/codecs/libatrac/fixp_math.h =================================================================== --- apps/codecs/libatrac/fixp_math.h (revision 26255) +++ apps/codecs/libatrac/fixp_math.h (working copy) @@ -31,30 +31,34 @@ /* Fixed point math routines for use in atrac3.c */ #if defined(CPU_ARM) + /* Calculates: result = (X*Y)>>16 */ + /* Use scratch register r12 */ #define fixmul16(X,Y) \ ({ \ - int32_t low; \ - int32_t high; \ - asm volatile ( /* calculates: result = (X*Y)>>16 */ \ - "smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \ - "mov %0, %0, lsr #16 \n\t" /* %0 = %0 >> 16 */ \ - "orr %0, %0, %1, lsl #16 \n\t"/* result = %0 OR (%1 << 16) */ \ - : "=&r"(low), "=&r" (high) \ - : "r"(X),"r"(Y)); \ - low; \ + int32_t res; \ + asm volatile ( \ + "smull %[res], r12, %[x], %[y] \n\t" /* multiply */ \ + "mov %[res], %[res], lsr #16 \n\t" /* %[res] >>= 16 */ \ + "orr %[res], %[res], r12, lsl #16" /* %[res] = %[res] OR (r12 << 16) */ \ + : [res]"=&r"(res) \ + : [x]"r"(X), [y]"r"(Y) \ + : "r12" ); \ + res; \ }) + /* Calculates: result = (X*Y)>>31 */ + /* Use scratch register r12 */ #define fixmul31(X,Y) \ ({ \ - int32_t low; \ - int32_t high; \ - asm volatile ( /* calculates: result = (X*Y)>>31 */ \ - "smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \ - "mov %0, %0, lsr #31 \n\t" /* %0 = %0 >> 31 */ \ - "orr %0, %0, %1, lsl #1 \n\t" /* result = %0 OR (%1 << 1) */ \ - : "=&r"(low), "=&r" (high) \ - : "r"(X),"r"(Y)); \ - low; \ + int32_t res; \ + asm volatile ( \ + "smull %[res], r12, %[x], %[y] \n\t" /* multiply */ \ + "mov %[res], %[res], lsr #31 \n\t" /* %[res] >>= 31 */ \ + "orr %[res], %[res], r12, lsl #1" /* %[res] = %[res] OR (r12 << 1) */ \ + : [res]"=&r"(res) \ + : [x]"r"(X), [y]"r"(Y) \ + : "r12" ); \ + res; \ }) #elif defined(CPU_COLDFIRE) #define fixmul16(X,Y) \