diff --git a/apps/codecs/demac/libdemac/predictor-arm.S b/apps/codecs/demac/libdemac/predictor-arm.S
index 1d55876..cfd2800 100644
--- a/apps/codecs/demac/libdemac/predictor-arm.S
+++ b/apps/codecs/demac/libdemac/predictor-arm.S
@@ -505,7 +505,8 @@ loop:
 done:
     str     r14, [r12]              @ Save value of p->buf
     add     sp, sp, #12             @ Don't bother restoring r1-r3 
-    ldmia   sp!, {r4 - r11, pc}
+    ldmia   sp!, {r4 - r11, lr}
+    bx      lr
 
 move_hist:
     @ dest = r11 (p->historybuffer)
@@ -664,7 +665,8 @@ loopm:
 donem:
     str     r14, [r12]              @ Save value of p->buf
     add     sp, sp, #8              @ Don't bother restoring r1, r2
-    ldmia   sp!, {r4 - r11, pc}
+    ldmia   sp!, {r4 - r11, lr}
+    bx      lr
 
 move_histm:
     @ dest = r11 (p->historybuffer)
diff --git a/apps/codecs/lib/mdct_arm.S b/apps/codecs/lib/mdct_arm.S
index 7613983..126a30f 100644
--- a/apps/codecs/lib/mdct_arm.S
+++ b/apps/codecs/lib/mdct_arm.S
@@ -60,7 +60,7 @@ mdct_butterfly_8:
     add     r11, r12, r7                @ y7 = (x7 + x3) + (x5 + x1)
     stmia   r0!, {r1, r2, r3, r4, r5, r6, r10, r11}
 
-    mov     pc, lr
+    bx      lr
 
 mdct_butterfly_16:
 @ inputs: r0,r1   &lr
@@ -127,7 +127,8 @@ mdct_butterfly_16:
     @ mdct_butterfly_8 increments r0 by another #8*4 here
     @ at end, r0 has been incremented by #16*4
 
-    ldr     pc, [sp], #4
+    ldr     lr, [sp], #4
+    bx      lr
 
 mdct_butterfly_32:
     stmdb   sp!, {r4-r11, lr}
@@ -257,7 +258,8 @@ mdct_butterfly_32:
     @ and we wanted to advance by #16*4 anyway, so just call again
     bl      mdct_butterfly_16
 
-    ldmia   sp!, {r4-r11, pc}
+    ldmia   sp!, {r4-r11, lr}
+    bx      lr
 
     @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
 mdct_butterfly_generic_loop:
@@ -433,5 +435,6 @@ mdct_butterfly_generic_loop:
     cmp     r2, r4
     bhi     1b
 
-    ldmia   sp!, {r4-r11, pc}
+    ldmia   sp!, {r4-r11, lr}
+    bx      lr
 
diff --git a/apps/codecs/libatrac/atrac3_arm.S b/apps/codecs/libatrac/atrac3_arm.S
index 0908d58..d8a739f 100644
--- a/apps/codecs/libatrac/atrac3_arm.S
+++ b/apps/codecs/libatrac/atrac3_arm.S
@@ -63,7 +63,8 @@ atrac3_iqmf_matrixing:
     subs r3, r3, #4                 /* counter -= 4 */
     bgt .iqmf_matrixing_loop
     
-    ldmfd   sp!, {r4-r9, pc}       /* restore registers */
+    ldmfd   sp!, {r4-r9, lr}       /* restore registers */
+    bx      lr
 
 .atrac3_iqmf_matrixing_end:
     .size   atrac3_iqmf_matrixing,.atrac3_iqmf_matrixing_end-atrac3_iqmf_matrixing
@@ -218,7 +219,8 @@ atrac3_iqmf_dewindowing:
     subs r3, r3, #1                 /* outer loop -= 1 */
     bgt .iqmf_dewindow_outer_loop
     
-    ldmfd   sp!, {r4-r9, pc}        /* restore registers */
+    ldmfd   sp!, {r4-r9, lr}        /* restore registers */
+    bx      lr
     
 .atrac3_iqmf_dewindowing_end:
     .size   atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing
diff --git a/apps/codecs/libffmpegFLAC/arm.S b/apps/codecs/libffmpegFLAC/arm.S
index 2a2746e..7b10228 100644
--- a/apps/codecs/libffmpegFLAC/arm.S
+++ b/apps/codecs/libffmpegFLAC/arm.S
@@ -267,5 +267,6 @@ lpc_decode_arm:
     bne .default              @ no, prepare for next sample
 
 .exit:
-    ldmia sp!, { r4-r11, pc }
+    ldmia sp!, { r4-r11, lr }
+    bx lr
 
diff --git a/apps/codecs/libmad/dct32_arm.S b/apps/codecs/libmad/dct32_arm.S
index a4eda8a..fb48dd8 100644
--- a/apps/codecs/libmad/dct32_arm.S
+++ b/apps/codecs/libmad/dct32_arm.S
@@ -220,7 +220,8 @@ dct32:
     cmp      r0, #9
     bne      .l4
     add      sp, sp, #144
-    ldmia    sp!, {r4-r11, pc}
+    ldmia    sp!, {r4-r11, lr}
+    bx       lr
 bitrev:
     .word 0x0
     .word 0x2
diff --git a/apps/codecs/libmad/imdct_l_arm.S b/apps/codecs/libmad/imdct_l_arm.S
index b86ba11..12586f1 100644
--- a/apps/codecs/libmad/imdct_l_arm.S
+++ b/apps/codecs/libmad/imdct_l_arm.S
@@ -823,7 +823,8 @@ normal_block_x18_to_x35:
     @----
 
     add     sp, sp, #(21*4)             @ return stack frame
-    ldmia   sp!, { r4 - r11, pc }       @ restore callee saved regs, and return
+    ldmia   sp!, { r4 - r11, lr }       @ restore callee saved regs
+    bx      lr                          @ and return
 
     @----
 
@@ -992,7 +993,8 @@ start_block_x18_to_x35:
     @----
 
     add     sp, sp, #(21*4)             @ return stack frame
-    ldmia   sp!, { r4 - r11, pc }       @ restore callee saved regs, and return
+    ldmia   sp!, { r4 - r11, lr }       @ restore callee saved regs
+    bx      lr                          @ and return
 
     @----
     @END
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S
index e663b8f..3c3b479 100644
--- a/apps/codecs/libmad/synth_full_arm.S
+++ b/apps/codecs/libmad/synth_full_arm.S
@@ -135,7 +135,8 @@ synth_full_odd_sbsample:
 
     ldr     r5, =synth_full_sp
     ldr     sp, [r5]
-    ldmia   sp!, {r4-r11, pc}
+    ldmia   sp!, {r4-r11, lr}
+    bx      lr
 
 synth_full_even_sbsample:
     stmdb   sp!, {r4-r11, lr}
@@ -241,7 +242,8 @@ synth_full_even_sbsample:
 
     ldr     r5, =synth_full_sp
     ldr     sp, [r5]
-    ldmia   sp!, {r4-r11, pc}
+    ldmia   sp!, {r4-r11, lr}
+    bx      lr
 
     .global III_aliasreduce
 
@@ -289,7 +291,8 @@ III_aliasreduce:
     add     r0, r0, #72
     cmp     r0, r1
     blo     .arl1
-    ldmia   sp!, {r4-r11, pc}
+    ldmia   sp!, {r4-r11, lr}
+    bx      lr
 
 csa:
     .word +0x0db84a81
@@ -332,7 +335,8 @@ III_overlap:
     stmia r1!, {r4, r5, r6, r7, r12, lr}
     ldmia r0!, {r4, r5, r6, r7, r12, lr}
     stmia r1!, {r4, r5, r6, r7, r12, lr}
-    ldmia   sp!, {r4-r7, pc}
+    ldmia   sp!, {r4-r7, lr}
+    bx      lr
 
     .section    IBSS_SECTION_MPA_ARM,"aw",%nobits
 synth_full_sp:
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
index 3f78469..0cc3348 100644
--- a/apps/codecs/libmusepack/synth_filter_arm.S
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -89,7 +89,8 @@ mpc_decoder_windowing_D:
     subs lr, lr, #1
     bgt .loop32
     
-    ldmfd   sp!, {r4-r8, pc}
+    ldmfd   sp!, {r4-r8, lr}
+    bx      lr
 #else
 mpc_decoder_windowing_D:
     /* r0 = Data[] */
@@ -287,7 +288,8 @@ mpc_decoder_windowing_D:
     str r8, [r0], #4            /* store Data */
     add r1, r1, #4              /* V++ */
     
-    ldmfd   sp!, {r4-r11, pc}
+    ldmfd   sp!, {r4-r11, lr}
+    bx      lr
 #endif
 .mpc_dewindowing_end:
     .size   mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
diff --git a/apps/codecs/libspeex/filters_arm4.S b/apps/codecs/libspeex/filters_arm4.S
index 109556f..340c397 100644
--- a/apps/codecs/libspeex/filters_arm4.S
+++ b/apps/codecs/libspeex/filters_arm4.S
@@ -49,7 +49,8 @@ iir_mem16:
     beq     .order_10
     cmp     r5, #8
     beq     .order_8
-    ldmia   sp!, { r4-r11, pc }     @ Non-supported order, return
+    ldmia   sp!, { r4-r11, lr }     @ Non-supported order, return
+    bx      lr
 
     @ TODO: try using direct form 1 filtering
 .order_8:
@@ -94,7 +95,8 @@ iir_mem16:
     bne     0b
     ldr     r4, [sp, #40]           @ r4 = mem
     stmia   r4, { r5-r12 }          @ Save back mem[]
-    ldmia   sp!, { r4-r11, pc }     @ Exit
+    ldmia   sp!, { r4-r11, lr }     @ Exit
+    bx      lr
 
 .order_10:
     ldmia   r4, { r5-r9 }           @ r5-r9 = mem[0..4]
@@ -154,7 +156,8 @@ iir_mem16:
     sub     r1, r1, #10*2
     subs    r3, r3, #1
     bne     .order_10
-    ldmia   sp!, { r4-r11, pc }     @ Exit
+    ldmia   sp!, { r4-r11, lr }     @ Exit
+    bx      lr
 
 
 /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */
@@ -300,7 +303,8 @@ qmf_synth:
     strh    r8, [r6], #4
     subs    r4, r4, #4
     bne     0b
-    ldmia   sp!, { r4-r11, pc }     @ Exit
+    ldmia   sp!, { r4-r11, lr }     @ Exit
+    bx      lr
 
 
 /* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
@@ -325,5 +329,6 @@ signal_mul:
     subs    r3, r3, #4                      @ Are we done?
     bne     0b
 
-    ldmia   sp!, { r4-r8, pc }              @ Exit
+    ldmia   sp!, { r4-r8, lr }              @ Exit
+    bx      lr
 
diff --git a/apps/codecs/libtta/filter_arm.S b/apps/codecs/libtta/filter_arm.S
index 37c515d..51cb39d 100644
--- a/apps/codecs/libtta/filter_arm.S
+++ b/apps/codecs/libtta/filter_arm.S
@@ -172,7 +172,8 @@ hybrid_filter:
     @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3)
     stmda    r2,  {r10, r11, r12, lr}
     stmda    r3,  {r5,  r6,  r7,  r8}
-    ldmfd    sp!, {r4-r12, pc}                   @ hybrid_filter end (when fs->index != 0)
+    ldmfd    sp!, {r4-r12, lr}                   @ hybrid_filter end (when fs->index != 0)
+    bx       lr
 
 .hf_memshl:
     @ memshl (fs->dl)
@@ -197,7 +198,8 @@ hybrid_filter:
     ldmia    r9,  {r1, r2, r3, r4}
     sub      r9,  r9,  #64                       @ r9 = fs->dx
     stmia    r9,  {r1 - r8}
-    ldmfd    sp!, {r4 - r12, pc}                 @ hybrid_filter end (when fs->index == 0)
+    ldmfd    sp!, {r4 - r12, lr}                 @ hybrid_filter end (when fs->index == 0)
+    bx       lr
 
 hybrid_filter_end:
     .size    hybrid_filter, hybrid_filter_end - hybrid_filter
diff --git a/apps/codecs/libwavpack/arm.S b/apps/codecs/libwavpack/arm.S
index 90dfd46..21184d1 100644
--- a/apps/codecs/libwavpack/arm.S
+++ b/apps/codecs/libwavpack/arm.S
@@ -470,5 +470,6 @@ term_minus_3_loop:
 common_exit:
         strh    r4, [r5, #4]
         strh    r0, [r5, #6]
-        ldmfd   sp!, {r4 - r8, r10, r11, pc}
+        ldmfd   sp!, {r4 - r8, r10, r11, lr}
+        bx      lr
 
diff --git a/apps/codecs/libwavpack/arml.S b/apps/codecs/libwavpack/arml.S
index 5745c81..daf5789 100644
--- a/apps/codecs/libwavpack/arml.S
+++ b/apps/codecs/libwavpack/arml.S
@@ -500,5 +500,6 @@ common_exit:
         mov     r4, r4, asr #18
         strh    r4, [r5, #4]
         strh    r0, [r5, #6]
-        ldmfd   sp!, {r4 - r8, r10, r11, pc}
+        ldmfd   sp!, {r4 - r8, r10, r11, lr}
+        bx      lr
 
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index 2150ff0..e1838ec 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -47,7 +47,8 @@ channels_process_sound_chan_mono:
     stmia   r2!, { r12, r14 }          @ store Mo0, Mo1
     bgt     .monoloop                  @
                                        @
-    ldmltfd sp!, { r4, pc }            @ if count was even, we're done
+    ldmltfd sp!, { r4, lr }            @ if count was even, we're done
+    bxlt    lr
                                        @
 .mono_singlesample:                    @
     ldr     r3, [r1]                   @ r3 = Ls
@@ -57,7 +58,8 @@ channels_process_sound_chan_mono:
     str     r12, [r1]                  @ store Mo
     str     r12, [r2]                  @ store Mo
                                        @
-    ldmfd   sp!, { r4, pc }            @
+    ldmfd   sp!, { r4, lr }            @
+    bx      lr
     .size   channels_process_sound_chan_mono, \
                 .-channels_process_sound_chan_mono
 
@@ -112,7 +114,8 @@ channels_process_sound_chan_custom:
 
     bgt     .custom_loop
     
-    ldmltfd sp!, { r4-r10, pc }        @ < 0? even count
+    ldmltfd sp!, { r4-r10, lr }        @ < 0? even count
+    bxlt    lr
     
 .custom_single_sample:
     ldr     r5, [r1]                   @ handle odd sample
@@ -131,7 +134,8 @@ channels_process_sound_chan_custom:
     str     r5, [r1]                   @ Store Lc0
     str     r7, [r2]                   @ Store Rc0
 
-    ldmfd   sp!, { r4-r10, pc }
+    ldmfd   sp!, { r4-r10, lr }
+    bx      lr
     .size   channels_process_sound_chan_custom, \
                 .-channels_process_sound_chan_custom
 
@@ -164,7 +168,8 @@ channels_process_sound_chan_karaoke:
     stmia   r2!, { r12, r14 }          @ store Ro0, Ro1
     bgt     .karaokeloop               @
                                        @
-    ldmltfd sp!, { r4, pc }            @ if count was even, we're done
+    ldmltfd sp!, { r4, lr }            @ if count was even, we're done
+    bxlt    lr
                                        @
 .karaoke_singlesample:                 @
     ldr     r3, [r1]                   @ r3 = Li
@@ -175,7 +180,8 @@ channels_process_sound_chan_karaoke:
     str     r3, [r1]                   @ store Lo
     str     r12, [r2]                  @ store Ro
                                        @
-    ldmfd   sp!, { r4, pc }            @
+    ldmfd   sp!, { r4, lr }            @
+    bx      lr
     .size   channels_process_sound_chan_karaoke, \
                 .-channels_process_sound_chan_karaoke
 
@@ -225,7 +231,8 @@ sample_output_mono:
     subs    r0, r0, #2
     bgt     .somloop     
        
-    ldmltfd sp!, { r4-r6, pc }         @ even 'count'? return
+    ldmltfd sp!, { r4-r6, lr }         @ even 'count'? return
+    bxlt    lr
 
 .som_singlesample:
     ldr     r5, [r2]                   @ do odd sample
@@ -239,7 +246,8 @@ sample_output_mono:
     orr     r5, r5, r5, lsl #16
     str     r5, [r3]
 
-    ldmfd   sp!, { r4-r6, pc }
+    ldmfd   sp!, { r4-r6, lr }
+    bx      lr
     .size   sample_output_mono, .-sample_output_mono
     
 /****************************************************************************
@@ -302,7 +310,8 @@ sample_output_stereo:
     subs    r0, r0, #2
     bgt     .sosloop
 
-    ldmltfd sp!, { r4-r9, pc }         @ even 'count'? return
+    ldmltfd sp!, { r4-r9, lr }         @ even 'count'? return
+    bxlt    lr
 
 .sos_singlesample:    
     ldr     r6, [r2]                   @ left odd sample
@@ -324,7 +333,8 @@ sample_output_stereo:
 
     str     r8, [r3]
 
-    ldmfd   sp!, { r4-r9, pc }
+    ldmfd   sp!, { r4-r9, lr }
+    bx      lr
     .size   sample_output_stereo, .-sample_output_stereo
 #endif /* ARM_ARCH < 6 */    
 
@@ -387,7 +397,8 @@ apply_crossfeed:
     stmia   r12, { r8-r11 }            @ save filter history
     str     r0, [r12, #30*4]           @ save delay line index
     add     sp, sp, #8                 @ remove temp variables from stack
-    ldmia   sp!, { r4-r11, pc }
+    ldmia   sp!, { r4-r11, lr }
+    bx      lr
     .size   apply_crossfeed, .-apply_crossfeed
 
 /****************************************************************************
@@ -444,7 +455,8 @@ dsp_downsample:
     ldr     r1, [r3]                @ r1 = &dst[0]
     sub     r8, r8, r1              @ dst - &dst[0]
     mov     r0, r8, lsr #2          @ convert bytes->samples
-    ldmia   sp!, { r4-r11, pc }     @ ... and we're out
+    ldmia   sp!, { r4-r11, lr }     @ ... and we're out
+    bx      lr
     .size   dsp_downsample, .-dsp_downsample
 
 /****************************************************************************
@@ -507,7 +519,8 @@ dsp_upsample:
     sub     r8, r8, r2              @ dst - &dst[0]
     mov     r0, r8, lsr #2          @ convert bytes->samples
     add     sp, sp, #8              @ adjust stack for temp variables
-    ldmfd   sp!, { r4-r11, pc }     @ ... and we're out
+    ldmfd   sp!, { r4-r11, lr }     @ ... and we're out
+    bx      lr
     .size       dsp_upsample, .-dsp_upsample
 
 /****************************************************************************
@@ -554,5 +567,6 @@ dsp_apply_gain:
     subs    r3, r3, #1
     bgt     .dag_outerloop          @ end of outer loop
                
-    ldmfd   sp!, { r4-r8, pc }
+    ldmfd   sp!, { r4-r8, lr }
+    bx      lr
     .size   dsp_apply_gain, .-dsp_apply_gain
diff --git a/apps/eq_arm.S b/apps/eq_arm.S
index 92446e3..0c0a222 100644
--- a/apps/eq_arm.S
+++ b/apps/eq_arm.S
@@ -85,5 +85,6 @@ eq_filter:
     bne .filterloop
 
     add sp, sp, #16            @ compensate for temp storage
-    ldmia sp!, { r4-r11, pc }
+    ldmia sp!, { r4-r11, lr }
+    bx lr
 
diff --git a/apps/plugins/SOURCES b/apps/plugins/SOURCES
index dbb5843..133a596 100644
--- a/apps/plugins/SOURCES
+++ b/apps/plugins/SOURCES
@@ -244,3 +244,4 @@ test_touchscreen.c
 #endif
 test_viewports.c
 #endif
+
diff --git a/apps/plugins/mpegplayer/idct_arm.S b/apps/plugins/mpegplayer/idct_arm.S
index 7253d89..c10ba27 100644
--- a/apps/plugins/mpegplayer/idct_arm.S
+++ b/apps/plugins/mpegplayer/idct_arm.S
@@ -313,7 +313,8 @@ mpeg2_idct_copy:
     add    r1, r1, r2
     cmp    r0, r12
     blo    1b
-    ldmfd  sp!, { r4-r11, pc }
+    ldmfd  sp!, { r4-r11, lr }
+    bx     lr
 
 mpeg2_idct_add:
     cmp    r0, #129
@@ -385,7 +386,8 @@ mpeg2_idct_add:
     add    r1, r1, r2
     cmp    r0, r12
     blo    2b
-    ldmfd  sp!, { r4-r11, pc }
+    ldmfd  sp!, { r4-r11, lr }
+    bx     lr
 3:
     stmfd  sp!, { r4-r5, lr }
     ldrsh  r1, [r0, #0]           /* r1 = block[0] */
@@ -438,4 +440,5 @@ mpeg2_idct_add:
     add    r2, r2, r3
     cmp    r2, r0
     blo    4b
-    ldmfd  sp!, { r4-r5, pc }
+    ldmfd  sp!, { r4-r5, lr }
+    bx     lr
diff --git a/apps/plugins/mpegplayer/motion_comp_arm_s.S b/apps/plugins/mpegplayer/motion_comp_arm_s.S
index fb29d59..bf2f749 100644
--- a/apps/plugins/mpegplayer/motion_comp_arm_s.S
+++ b/apps/plugins/mpegplayer/motion_comp_arm_s.S
@@ -47,7 +47,8 @@ MC_put_o_16_align0:
         subs r3, r3, #1
         add r0, r0, r2
         bne MC_put_o_16_align0
-        ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r7, lr} @@ update PC with LR content.
+        bx lr
 
 .macro  ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
         mov \R0, \R0, lsr #(\shift)
@@ -71,7 +72,8 @@ MC_put_o_16_align1:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r7, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_o_16_align2:
         and r1, r1, #0xFFFFFFFC
@@ -83,7 +85,8 @@ MC_put_o_16_align2:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r7, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_o_16_align3:
         and r1, r1, #0xFFFFFFFC
@@ -95,7 +98,8 @@ MC_put_o_16_align3:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r7, lr} @@ update PC with LR content.
+        bx lr
 
 @ ----------------------------------------------------------------
         .align
@@ -120,7 +124,8 @@ MC_put_o_8_align0:
         add r0, r0, r2
         subs r3, r3, #1
         bne MC_put_o_8_align0
-        ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4, r5, lr} @@ update PC with LR content.
+        bx lr
 
 .macro  ADJ_ALIGN_DW shift, R0, R1, R2
         mov \R0, \R0, lsr #(\shift)
@@ -140,7 +145,8 @@ MC_put_o_8_align1:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4, r5, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_o_8_align2:
         and r1, r1, #0xFFFFFFFC
@@ -152,7 +158,8 @@ MC_put_o_8_align2:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4, r5, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_o_8_align3:
         and r1, r1, #0xFFFFFFFC
@@ -164,7 +171,8 @@ MC_put_o_8_align3:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4, r5, lr} @@ update PC with LR content.
+        bx lr
 
 @ ----------------------------------------------------------------
 .macro  AVG_PW rW1, rW2
@@ -218,7 +226,8 @@ MC_put_x_16_align0:
         subs r3, r3, #1
         add r0, r0, r2
         bne MC_put_x_16_align0
-        ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r8, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_x_16_align1:
         and r1, r1, #0xFFFFFFFC
@@ -234,7 +243,8 @@ MC_put_x_16_align1:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r8, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_x_16_align2:
         and r1, r1, #0xFFFFFFFC
@@ -250,7 +260,8 @@ MC_put_x_16_align2:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r8, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_x_16_align3:
         and r1, r1, #0xFFFFFFFC
@@ -266,7 +277,8 @@ MC_put_x_16_align3:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r8, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 @ ----------------------------------------------------------------
         .align
@@ -297,7 +309,8 @@ MC_put_x_8_align0:
         subs r3, r3, #1
         add r0, r0, r2
         bne MC_put_x_8_align0
-        ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r6, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_x_8_align1:
         and r1, r1, #0xFFFFFFFC
@@ -311,7 +324,8 @@ MC_put_x_8_align1:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r6, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_x_8_align2:
         and r1, r1, #0xFFFFFFFC
@@ -325,7 +339,8 @@ MC_put_x_8_align2:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r6, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
 
 MC_put_x_8_align3:
         and r1, r1, #0xFFFFFFFC
@@ -339,4 +354,5 @@ MC_put_x_8_align3:
         subs r3, r3, #1
         add r0, r0, r2
         bne 1b
-        ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
+        ldmfd sp!, {r4-r6, HIGH_REGS, lr} @@ update PC with LR content.
+        bx lr
diff --git a/apps/plugins/pacbox/pacbox_arm.S b/apps/plugins/pacbox/pacbox_arm.S
index 32cf2d4..f7eea69 100644
--- a/apps/plugins/pacbox/pacbox_arm.S
+++ b/apps/plugins/pacbox/pacbox_arm.S
@@ -120,7 +120,8 @@ loop_x:
 /* end of y loop */
          add     r1, r1, #224*3           @ vbuf += 224*3
          subs    lr, lr, #4               @ y-=4
-         ldmeqia sp!, {r4-r11, pc}
+         ldmeqia sp!, {r4-r11, lr}
+         bxeq    lr
          b       loop_y
 #endif
 #endif
diff --git a/apps/plugins/test_codec.c b/apps/plugins/test_codec.c
index 1ecf225..2e216bb 100644
--- a/apps/plugins/test_codec.c
+++ b/apps/plugins/test_codec.c
@@ -775,7 +775,6 @@ enum plugin_status plugin_start(const void* parameter)
         "Quit",
     );
 
-show_menu:
     rb->lcd_clear_display();
 
     result = rb->do_menu(&menu, &selection, NULL, false);
@@ -860,7 +859,6 @@ show_menu:
         }
         while (rb->button_get(true) != TESTCODEC_EXITBUTTON);
     }
-    goto show_menu;
 
 exit:
     log_close();
diff --git a/apps/recorder/jpeg_idct_arm.S b/apps/recorder/jpeg_idct_arm.S
index 4739600..f174b3f 100644
--- a/apps/recorder/jpeg_idct_arm.S
+++ b/apps/recorder/jpeg_idct_arm.S
@@ -89,7 +89,8 @@ jpeg_idct2v:
     add    r0,  r0,  #4
     cmp    r0,  r1
     bcc    1b
-    ldmia  sp!, { r4, pc }
+    ldmia  sp!, { r4, lr }
+    bx     lr
 #else
 /* ARMv6 offers partitioned adds and subtracts, used here to unroll the loop
    to two columns.
@@ -137,7 +138,8 @@ jpeg_idct2h:
     add    r1,  r1,  r3
     cmp    r0,  r2
     bcc    1b
-    ldmia  sp!, { r4-r5, pc }
+    ldmia  sp!, { r4-r5, lr }
+    bx     lr
 #else
     stmdb  sp!, { r4, lr }
     ldrsh  r14, .Lpool4+2
@@ -190,7 +192,8 @@ jpeg_idct4v:
     add    r0,  r0,  #2
     cmp    r0,  r1
     bcc    1b
-    ldmia  sp!, { r4-r7, pc }
+    ldmia  sp!, { r4-r7, lr }
+    bx     lr
 #elif ARM_ARCH < 6
     stmdb sp!, { r4-r8, lr }
     mov    r8,  #1024
@@ -325,7 +328,8 @@ jpeg_idct4h:
     add    r1,  r1,  r3
     cmp    r0,  r2
     bcc    1b
-    ldmia sp!, { r4-r10, pc }
+    ldmia sp!, { r4-r10, lr }
+    bx     lr
 #elif ARM_ARCH < 6
     stmdb  sp!, { r4-r9, lr }
     ldr    r4,  .Lpool4
@@ -424,7 +428,8 @@ jpeg_idct8v:
     cmp    r0,  r1
     add    r2,  r2,  #2
     bcc    1b
-    ldmia  sp!, { r4-r11, pc }
+    ldmia  sp!, { r4-r11, lr }
+    bx     lr
 2:
     ldr    r14, =4433
     ldr    r12, =-15137
@@ -586,7 +591,8 @@ jpeg_idct8v:
     cmp    r0,  r1
     add    r2,  r2,  #2
     bcc    1b
-    ldmia  sp!, { r4-r11, pc }
+    ldmia  sp!, { r4-r11, lr }
+    bx     lr
     .size jpeg_idct8v, .-jpeg_idct8v
 
 #if ARM_ARCH > 4
@@ -631,7 +637,8 @@ jpeg_idct8h:
     add    r1,  r1,  r3
     cmp    r0,  r2
     bcc    1b
-    ldmia  sp!, { r4-r11, pc }
+    ldmia  sp!, { r4-r11, lr }
+    bx     lr
 2:
     ldr    r14, =4433
     ldr    r12, =-15137
@@ -826,7 +833,8 @@ jpeg_idct8h:
     add    r1,  r1,  r3
     cmp    r0,  r2
     bcc    1b
-    ldmia  sp!, { r4-r11, pc }
+    ldmia  sp!, { r4-r11, lr }
+    bx     lr
     .size jpeg_idct8h, .-jpeg_idct8h
 #else
 jpeg_idct8v:
diff --git a/firmware/rolo.c b/firmware/rolo.c
index 6916678..50d9577 100644
--- a/firmware/rolo.c
+++ b/firmware/rolo.c
@@ -91,7 +91,7 @@ void rolo_restart_cop(void)
 
     asm volatile(
         "mov   r0, %0   \n"
-        "mov   pc, r0   \n"
+        "bx    r0       \n"
         : : "I"(DRAM_START)
     );
 }
@@ -168,8 +168,9 @@ void rolo_restart(const unsigned char* source, unsigned char* dest,
 #endif
 
     asm volatile(
-        "mov   pc, %0   \n"
-        : : "r"(DRAM_START)
+        "mov   r12, %0  \n"
+        "bx    r12      \n"
+        : : "r"(DRAM_START) : "r12"
     );
 
 #elif defined(CPU_ARM)
@@ -178,8 +179,9 @@ void rolo_restart(const unsigned char* source, unsigned char* dest,
     cpucache_invalidate();
 #endif
     asm volatile(
-        "mov   pc, %0            \n"
-        : : "r"(dest)
+        "mov   r12, %0  \n"
+        "bx    r12      \n"
+        : : "r"(dest) : "r12"
     );
 #elif defined(CPU_MIPS)
     __dcache_writeback_all();
diff --git a/firmware/target/arm/as3525/debug-as3525.c b/firmware/target/arm/as3525/debug-as3525.c
index 2528b1a..f4dd754 100644
--- a/firmware/target/arm/as3525/debug-as3525.c
+++ b/firmware/target/arm/as3525/debug-as3525.c
@@ -70,6 +70,14 @@ extern bool sd_enabled;
 #include "dbop-as3525.h"
 #endif
 
+static inline unsigned read_cache (void)
+{
+    unsigned cp15_value;
+    asm volatile (
+        "mrc p15, 0, %0, c0, c0, 1   @ read control reg\n" : "=r"(cp15_value));
+    return (cp15_value);
+}
+
 static inline unsigned read_cp15 (void)
 {
     unsigned cp15_value;
@@ -367,6 +375,7 @@ bool __dbg_hw_info(void)
 
         lcd_putsf(0, line++, "MMU :  %s CVDDP:%4d", (read_cp15() & CP15_MMU) ?
                                         " on" : "off", adc_read(ADC_CVDD) * 25);
+        lcd_putsf(0, line++, "CACHE :   0x%8x", read_cache());
         lcd_putsf(0, line++, "Icache:%s Dcache:%s",
                                       (read_cp15() & CP15_IC)  ? " on" : "off",
                                       (read_cp15() & CP15_DC)  ? " on" : "off");
diff --git a/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S b/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
index f71216c..55fa374 100644
--- a/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
+++ b/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
@@ -241,7 +241,8 @@ lcd_write_yuv420_lines:
     tst         r7, #DBOP_BUSY          @ fifo not empty?
     beq         1b                      @
 
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -545,6 +546,7 @@ lcd_write_yuv420_lines_odither:
     tst         r7, #DBOP_BUSY          @ fifo not empty?
     beq         1b                      @
 
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/as3525/sansa-clip/lcd-as-clip.S b/firmware/target/arm/as3525/sansa-clip/lcd-as-clip.S
index 5a52eb0..a10680b 100644
--- a/firmware/target/arm/as3525/sansa-clip/lcd-as-clip.S
+++ b/firmware/target/arm/as3525/sansa-clip/lcd-as-clip.S
@@ -102,5 +102,6 @@ lcd_grey_data:
     ands    r5, r5, #(1<<10)    @ wait until push fifo empties
     beq     1b
 
-    ldmfd   sp!, {r4-r7, pc}
+    ldmfd   sp!, {r4-r7, lr}
+    bx      lr
     .size   lcd_grey_data,.-lcd_grey_data
diff --git a/firmware/target/arm/ata-as-arm.S b/firmware/target/arm/ata-as-arm.S
index b1cafc2..2f2310b 100644
--- a/firmware/target/arm/ata-as-arm.S
+++ b/firmware/target/arm/ata-as-arm.S
@@ -146,7 +146,8 @@ copy_read_sectors:
 
     strb    r3, [r0], #1        /* store final byte */
 
-    ldmfd   sp!, {r4, r5, pc}
+    ldmfd   sp!, {r4, r5, lr}
+    bx      lr
 
     /* 16-bit aligned */
 .r_aligned:
@@ -195,7 +196,8 @@ copy_read_sectors:
     ldrneh  r3, [r2]
     strneh  r3, [r0], #2
 
-    ldmfd   sp!, {r4, r5, pc}
+    ldmfd   sp!, {r4, r5, lr}
+    bx      lr
 
 .r_end:
     .size   copy_read_sectors,.r_end-copy_read_sectors
@@ -300,7 +302,8 @@ copy_write_sectors:
     orr     r3, r3, r4, lsl #8
     strh    r3, [r2]            /* write final halfword */
 
-    ldmfd   sp!, {r4, r5, pc}
+    ldmfd   sp!, {r4, r5, lr}
+    bx      lr
 
     /* 16-bit aligned */
 .w_aligned:
@@ -349,7 +352,8 @@ copy_write_sectors:
     ldrneh  r3, [r0], #2
     strneh  r3, [r2]
 
-    ldmfd   sp!, {r4, r5, pc}
+    ldmfd   sp!, {r4, r5, lr}
+    bx      lr
 
 .w_end:
     .size   copy_write_sectors,.w_end-copy_write_sectors
diff --git a/firmware/target/arm/ipod/lcd-as-gray.S b/firmware/target/arm/ipod/lcd-as-gray.S
index b6878a1..9e0cc11 100644
--- a/firmware/target/arm/ipod/lcd-as-gray.S
+++ b/firmware/target/arm/ipod/lcd-as-gray.S
@@ -97,7 +97,8 @@ lcd_write_data_shifted:
     subs    r1, r1, #1
     bne     .sloop
 
-    ldmfd   sp!, {r4, pc}
+    ldmfd   sp!, {r4, lr}
+    bx      lr
     .size   lcd_write_data_shifted,.-lcd_write_data_shifted
     
 #elif defined IPOD_MINI
@@ -132,7 +133,8 @@ lcd_write_data_shifted:
     subs    r1, r1, #1
     bne     .sloop
 
-    ldr     pc, [sp], #4
+    ldr     lr, [sp], #4
+    bx      lr
     .size   lcd_write_data_shifted,.-lcd_write_data_shifted
 
 #endif
@@ -179,7 +181,8 @@ lcd_mono_data:
     subs    r1, r1, #1
     bne     .mloop
 
-    ldmfd   sp!, {r4, pc}
+    ldmfd   sp!, {r4, lr}
+    bx      lr
 
 .dibits:
     .byte   0x00, 0x03, 0x0C, 0x0F, 0x30, 0x33, 0x3C, 0x3F
@@ -278,6 +281,7 @@ lcd_grey_data:
     subs    r2, r2, #1
     bne     .greyloop
 
-    ldmfd   sp!, {r4-r7, pc}
+    ldmfd   sp!, {r4-r7, lr}
+    bx      lr
     .size   lcd_grey_data,.-lcd_grey_data
 
diff --git a/firmware/target/arm/ipod/video/lcd-as-video.S b/firmware/target/arm/ipod/video/lcd-as-video.S
index fa88dbc..0b454a1 100644
--- a/firmware/target/arm/ipod/video/lcd-as-video.S
+++ b/firmware/target/arm/ipod/video/lcd-as-video.S
@@ -60,7 +60,8 @@ lcd_write_data:                       /* r1 = pixel count, must be even */
     ldrne   r3, [r0], #4
     strne   r3, [lr]
 
-    ldmfd   sp!, {r4, pc}
+    ldmfd   sp!, {r4, lr}
+    bx      lr
 
 /****************************************************************************
  * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
@@ -294,7 +295,8 @@ lcd_write_yuv420_lines:
 
     ldr         r3, [sp, #12]
     add         sp, sp, r3            /* deallocate buffer */
-    ldmfd       sp!, { r4-r10, pc }   /* restore registers */
+    ldmfd       sp!, { r4-r10, lr }   /* restore registers */
+    bx          lr
 
     .ltorg
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
diff --git a/firmware/target/arm/iriver/h10/lcd-as-h10.S b/firmware/target/arm/iriver/h10/lcd-as-h10.S
index b3f12e4..08bccb7 100644
--- a/firmware/target/arm/iriver/h10/lcd-as-h10.S
+++ b/firmware/target/arm/iriver/h10/lcd-as-h10.S
@@ -232,7 +232,8 @@ lcd_write_yuv420_lines:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -533,6 +534,7 @@ lcd_write_yuv420_lines_odither:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/lcd-as-memframe.S b/firmware/target/arm/lcd-as-memframe.S
index 4532bab..4dc445c 100644
--- a/firmware/target/arm/lcd-as-memframe.S
+++ b/firmware/target/arm/lcd-as-memframe.S
@@ -101,7 +101,8 @@ lcd_copy_buffer_rect:                   @
     add     r0, r0, r4, lsl #1          @
     subs    r3, r3, #1                  @ next line
     bgt     10b @ copy line             @
-    ldmfd   sp!, { r4-r11, pc }         @ restore regs and return
+    ldmfd   sp!, { r4-r11, lr }         @ restore regs and return
+    bx lr
     .ltorg                              @ dump constant pool
     .size   lcd_copy_buffer_rect, .-lcd_copy_buffer_rect
 
@@ -344,7 +345,8 @@ lcd_write_yuv420_lines:
     subs        r2, r2, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r10, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r10, lr }     @ restore registers and return
+    bx lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -691,6 +693,7 @@ lcd_write_yuv420_lines_odither:
     subs        r2, r2, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/memcpy-arm.S b/firmware/target/arm/memcpy-arm.S
index d17d659..47bab82 100644
--- a/firmware/target/arm/memcpy-arm.S
+++ b/firmware/target/arm/memcpy-arm.S
@@ -98,7 +98,8 @@ memcpy:
         strcsb  r4, [r0], #1
         strcsb  ip, [r0]
 
-        ldmfd   sp!, {r0, r4, pc}
+        ldmfd   sp!, {r0, r4, lr}
+        bx      lr
 
 9:      rsb ip, ip, #4
         cmp ip, #2
diff --git a/firmware/target/arm/memmove-arm.S b/firmware/target/arm/memmove-arm.S
index ce056d9..fe7a204 100644
--- a/firmware/target/arm/memmove-arm.S
+++ b/firmware/target/arm/memmove-arm.S
@@ -112,7 +112,8 @@ memmove:
         strneb  r3, [r0, #-1]!
         strcsb  r4, [r0, #-1]!
         strcsb  ip, [r0, #-1]
-        ldmfd   sp!, {r0, r4, pc}
+        ldmfd   sp!, {r0, r4, lr}
+        bx      lr
 
 9:      cmp ip, #2
         ldrgtb  r3, [r1, #-1]!
diff --git a/firmware/target/arm/memset-arm.S b/firmware/target/arm/memset-arm.S
index 4d46b6f..0f83a5b 100644
--- a/firmware/target/arm/memset-arm.S
+++ b/firmware/target/arm/memset-arm.S
@@ -69,7 +69,8 @@ memset:
         stmgedb r0!, {r1, r3, ip, lr}
         stmgedb r0!, {r1, r3, ip, lr}
         bgt     3b
-        ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
+        ldmeqfd sp!, {lr}               @ Now <64 bytes to go.
+        bxeq    lr
 /*
  * No need to correct the count; we're only testing bits from now on
  */
diff --git a/firmware/target/arm/memset16-arm.S b/firmware/target/arm/memset16-arm.S
index 47034ab..c48740c 100644
--- a/firmware/target/arm/memset16-arm.S
+++ b/firmware/target/arm/memset16-arm.S
@@ -59,7 +59,8 @@ memset16:
         stmgeia r0!, {r1, r3, ip, lr}
         stmgeia r0!, {r1, r3, ip, lr}
         bgt     2b
-        ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
+        ldmeqfd sp!, {lr}               @ Now <64 bytes to go.
+        bxeq    lr
 /*
  * No need to correct the count; we're only testing bits from now on
  */
diff --git a/firmware/target/arm/olympus/mrobe-100/lcd-as-mr100.S b/firmware/target/arm/olympus/mrobe-100/lcd-as-mr100.S
index 2aede6d..ab762c5 100644
--- a/firmware/target/arm/olympus/mrobe-100/lcd-as-mr100.S
+++ b/firmware/target/arm/olympus/mrobe-100/lcd-as-mr100.S
@@ -111,6 +111,7 @@ lcd_grey_data:
     subs    r2, r2, #1
     bne     .greyloop
 
-    ldmfd   sp!, {r4-r7, pc}
+    ldmfd   sp!, {r4-r7, lr}
+    bx      lr
     .size   lcd_grey_data,.-lcd_grey_data
 
diff --git a/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S b/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S
index d5d5157..aff277c 100644
--- a/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S
+++ b/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S
@@ -243,7 +243,8 @@ lcd_write_yuv420_lines:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -551,6 +552,7 @@ lcd_write_yuv420_lines_odither:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S b/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S
index 73ad84a..4c84e94 100644
--- a/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S
+++ b/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S
@@ -248,7 +248,8 @@ lcd_write_yuv420_lines:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -565,6 +566,7 @@ lcd_write_yuv420_lines_odither:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/samsung/yh820/lcd-as-yh820.S b/firmware/target/arm/samsung/yh820/lcd-as-yh820.S
index 581a4f3..0e03044 100644
--- a/firmware/target/arm/samsung/yh820/lcd-as-yh820.S
+++ b/firmware/target/arm/samsung/yh820/lcd-as-yh820.S
@@ -238,7 +238,8 @@ lcd_write_yuv420_lines:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r10, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r10, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -545,6 +546,7 @@ lcd_write_yuv420_lines_odither:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/samsung/yh920/lcd-as-yh920.S b/firmware/target/arm/samsung/yh920/lcd-as-yh920.S
index 2c7f511..b2f721d 100644
--- a/firmware/target/arm/samsung/yh920/lcd-as-yh920.S
+++ b/firmware/target/arm/samsung/yh920/lcd-as-yh920.S
@@ -113,6 +113,7 @@ lcd_grey_data:
     subs    r2, r2, #1
     bne     .greyloop
 
-    ldmfd   sp!, {r4-r5, pc}
+    ldmfd   sp!, {r4-r5, lr}
+    bx      lr
     .size   lcd_grey_data,.-lcd_grey_data
 
diff --git a/firmware/target/arm/samsung/yh925/lcd-as-yh925.S b/firmware/target/arm/samsung/yh925/lcd-as-yh925.S
index b3f12e4..08bccb7 100644
--- a/firmware/target/arm/samsung/yh925/lcd-as-yh925.S
+++ b/firmware/target/arm/samsung/yh925/lcd-as-yh925.S
@@ -232,7 +232,8 @@ lcd_write_yuv420_lines:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -533,6 +534,7 @@ lcd_write_yuv420_lines_odither:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S b/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S
index 581a4f3..0e03044 100644
--- a/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S
+++ b/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S
@@ -238,7 +238,8 @@ lcd_write_yuv420_lines:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r10, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r10, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
 
@@ -545,6 +546,7 @@ lcd_write_yuv420_lines_odither:
     subs        r1, r1, #2              @ subtract block from width
     bgt         10b @ loop line         @
                                         @
-    ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
+    ldmfd       sp!, { r4-r11, lr }     @ restore registers and return
+    bx          lr
     .ltorg                              @ dump constant pool
     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/thread.c b/firmware/thread.c
index 54d966f..da879ad 100644
--- a/firmware/thread.c
+++ b/firmware/thread.c
@@ -224,9 +224,10 @@ static inline void load_context(const void* addr)
     asm volatile(
         "ldr     r0, [%0, #40]          \n" /* Load start pointer */
         "cmp     r0, #0                 \n" /* Check for NULL */
-        "ldmneia %0, { r0, pc }         \n" /* If not already running, jump to start */ 
+        "ldmneia %0, { r0, r12 }        \n" /* If not already running, jump to start */ 
+        "bxne    r12                    \n"
         "ldmia   %0, { r4-r11, sp, lr } \n" /* Load regs r4 to r14 from context */
-        : : "r" (addr) : "r0" /* only! */
+        : : "r" (addr) : "r0", "r12"
     );
 }
 
@@ -712,7 +713,8 @@ static void __attribute__((naked))
         "ldr    r0, =cpucache_invalidate \n" /* Invalidate new core's cache */
         "mov    lr, pc                   \n"
         "bx     r0                       \n"
-        "ldmfd  sp!, { r4-r11, pc }      \n" /* Restore non-volatile context to new core and return */
+        "ldmfd  sp!, { r4-r11, lr }      \n" /* Restore non-volatile context to new core */
+        "bx     lr                       \n" /* And return */
         ".ltorg                          \n" /* Dump constant pool */
         : : "i"(IDLE_STACK_WORDS)
     );
