|
Rockbox mail archiveSubject: Re: Segfault with Faster MDCT patch and -fPICRe: Segfault with Faster MDCT patch and -fPIC
From: Dave Hooper <dave_at_beermex.com>
Date: Mon, 20 Sep 2010 13:12:17 +0100 (You can see from this 'new' segfault that gcc clearly hasn't realised we've trashed r11 during the immediately-preceding calculations - the add/sub/stm stuff) > Yep, same problem as far as I can tell. We've just moved the problem from > uses-of-r10 to uses-of-r11. The asm code doesn't detect or avoid using the > pic register, so if you change the pic register something else will happen. > The fix requires telling gcc "I want to use r10 here, so if you are using it > you need to stack and preserve it". This ought to be possible through the > appropriate output/clobbers in the asm but my gut feeling is that what I've > tried to do to reuse code through macros makes this quite hard. So rather > than trying to define a register in one macro, use it in a couple of places, > and then stop using it later on and assume gcc knows to reload the original > value, we just need to rip that all out and use a proper standalone (inline) > function for each TRANSFORM type without trying to be clever about > register-reuse-across-macros. >> 2010/9/20 Rafaël Carré <rafael.carre_at_gmail.com>: >>> On Mon, 20 Sep 2010 11:29:59 +0100 >>> Dave Hooper <dave_at_beermex.com> wrote: >>> >>>> I suppose gcc doesn't actually know whether I want to use the value >>>> of pic or not, when I explicitly say register("r10"). So I think it's >>>> assuming I do want the value of pic and that 'i know what I'm doing'. >>>> If I unrolled those macros somewhat so that I don't need to keep >>>> registers live across macros it would probably make it easier + >>>> cleaner. >>> >>> This patch should use r11 instead >>> >>> -- >>> Rafaël Carré >>> >> >> Thanks. This makes Tremor segfault somewhere else. Here is gdb output: >> >> Program received signal SIGSEGV, Segmentation fault. >> fft32 (z=0x0) at fft-ffmpeg.c:354 >> 354 DECL_FFT(32,16,8) >> (gdb) bt >> #0 fft32 (z=0x0) at fft-ffmpeg.c:354 >> #1 0x0003c0b4 in fft64 (z=0x0) at fft-ffmpeg.c:355 >> #2 0x0003c0e4 in fft128 (z=0x0) at fft-ffmpeg.c:356 >> #3 0x0003c114 in fft256 (z=0x0) at fft-ffmpeg.c:357 >> #4 0x0003b960 in ff_fft_calc_c (nbits=<value optimized out>, z=0x0) >> at fft-ffmpeg.c:370 >> #5 0x0003ad78 in ff_imdct_half (nbits=28199932, output=0x1b100c8, >> input=<value optimized out>) at mdct-ffmpeg.c:112 >> #6 0x0003b178 in ff_imdct_calc (nbits=0, output=0x1afa5c8, input=0x0) >> at mdct-ffmpeg.c:348 >> #7 0x0003a5ac in mapping0_inverse (vb=0xbeed3670, l=0x1b0b538) at >> mapping0.c:290 >> #8 0x00033270 in vorbis_synthesis (vb=0xbeed3670, op=<value optimized >> out>, decodep=1) at synthesis.c:75 >> ... >> (gdb) info all-registers >> r0 0x0 0 >> r1 0x0 0 >> r2 0x0 0 >> r3 0x5a82799a 1518500250 >> r4 0x0 0 >> r5 0x0 0 >> r6 0x0 0 >> r7 0x0 0 >> r8 0x0 0 >> r9 0x0 0 >> r10 0x1aeeb30 28240688 >> r11 0x0 0 >> r12 0x0 0 >> sp 0xbeed3480 0xbeed3480 >> lr 0xc0 192 >> pc 0x3bf30 0x3bf30 <fft32+372> >> f0 0 (raw 0x0003f8cc0000000000000000) >> f1 0 (raw 0x0003f8cc0000000000000000) >> f2 0 (raw 0x0003f8cc0000000000000000) >> f3 0 (raw 0x0003f8cc0000000000000000) >> f4 0 (raw 0x0003f8cc0000000000000000) >> f5 0 (raw 0x0003f8cc0000000000000000) >> f6 0 (raw 0x0003f8cc0000000000000000) >> f7 0 (raw 0x0003f8cc0000000000000000) >> fps 0x0 0 >> cpsr 0x60000010 1610612752 >> (gdb) disass $pc-64,$pc+64 >> Dump of assembler code from 0x3bef0 to 0x3bf70: >> 0x0003bef0 <fft32+308>: add r11, r6, r11 >> 0x0003bef4 <fft32+312>: sub r4, r9, r4, lsl #1 >> 0x0003bef8 <fft32+316>: sub r6, r11, r6, lsl #1 >> 0x0003befc <fft32+320>: stm lr, {r9, r11} >> 0x0003bf00 <fft32+324>: add lr, lr, #16 >> 0x0003bf04 <fft32+328>: ldm lr, {r9, r11} >> 0x0003bf08 <fft32+332>: add r9, r7, r9 >> 0x0003bf0c <fft32+336>: add r11, r0, r11 >> 0x0003bf10 <fft32+340>: sub r7, r9, r7, lsl #1 >> 0x0003bf14 <fft32+344>: sub r8, r11, r0, lsl #1 >> 0x0003bf18 <fft32+348>: stm lr, {r9, r11} >> 0x0003bf1c <fft32+352>: add lr, lr, #16 >> 0x0003bf20 <fft32+356>: stm lr, {r4, r6} >> 0x0003bf24 <fft32+360>: add lr, lr, #16 >> 0x0003bf28 <fft32+364>: stm lr, {r7, r8} >> 0x0003bf2c <fft32+368>: add lr, r11, #192 ; 0xc0 >> => 0x0003bf30 <fft32+372>: ldm lr, {r1, r2, r3, r4, r5, r6, r7, r8} >> 0x0003bf34 <fft32+376>: add r1, r1, r3 >> 0x0003bf38 <fft32+380>: sub r3, r1, r3, lsl #1 >> 0x0003bf3c <fft32+384>: sub r7, r7, r5 >> 0x0003bf40 <fft32+388>: add r5, r7, r5, lsl #1 >> 0x0003bf44 <fft32+392>: add r1, r1, r5 >> 0x0003bf48 <fft32+396>: sub r5, r1, r5, lsl #1 >> 0x0003bf4c <fft32+400>: add r2, r2, r4 >> 0x0003bf50 <fft32+404>: sub r4, r2, r4, lsl #1 >> 0x0003bf54 <fft32+408>: add r12, r6, r8 >> 0x0003bf58 <fft32+412>: sub r6, r6, r8 >> 0x0003bf5c <fft32+416>: sub r8, r4, r7 >> 0x0003bf60 <fft32+420>: add r4, r4, r7 >> 0x0003bf64 <fft32+424>: sub r7, r3, r6 >> 0x0003bf68 <fft32+428>: add r3, r3, r6 >> 0x0003bf6c <fft32+432>: sub r6, r2, r12 >> End of assembler dump. >> >> z=0x0 -- strange. Let's track it. >> >> >> (gdb) up 7 >> #7 0x0003a5ac in mapping0_inverse (vb=0xbeed3670, l=0x1b0b538) at >> mapping0.c:290 >> 290 ff_imdct_calc(ci->blocksizes_nbits[vb->W], pcm, pcm); >> (gdb) l >> 285 >> 286 /* transform the PCM data; takes PCM vector, vb; modifies PCM vector > */ >> 287 /* only MDCT right now.... */ >> 288 for(i=0;i<vi->channels;i++){ >> 289 ogg_int32_t *pcm=vb->pcm[i]; >> 290 ff_imdct_calc(ci->blocksizes_nbits[vb->W], pcm, pcm); >> 291 } >> 292 >> 293 //for(j=0;j<vi->channels;j++) >> 294 //_analysis_output("imdct",seq+j,vb->pcm[j],-24,n,0,0); >> (gdb) p pcm >> $1 = (ogg_int32_t *) 0x0 >> (gdb) p i >> $2 = 1 >> (gdb) p vb->pcm[i] >> $3 = (ogg_int32_t *) 0x1af1b28 >> (gdb) p *vb >> $4 = {pcm = 0x1af1b50, opb = {headbit = 5, headptr = 0x1af2e80 >> "\016vorbis\"BCV\001", headend = 1, head = 0x1af1180, tail = >> 0x1af1180, >> count = 0}, lW = 1, W = 1, nW = 1, pcmend = 1024, mode = 1, >> eofflag = 0, granulepos = 0, sequence = -3, vd = 0x1af2134, >> localstore = 0x1b0f8c8, localtop = 4096, localalloc = 4096, totaluse >> = 8, reap = 0x1af1b30} >> >> pcm=0x0 but vb->pcm[i]=0x1af1b28 WTF? >> >> (gdb) disass $pc-64,$pc+64 >> Dump of assembler code from 0x3a56c to 0x3a5ec: >> 0x0003a56c <mapping0_inverse+868>: add r5, r5, #1 >> 0x0003a570 <mapping0_inverse+872>: ldr r3, [r0, #4] >> 0x0003a574 <mapping0_inverse+876>: add r4, r4, #4 >> 0x0003a578 <mapping0_inverse+880>: cmp r3, r5 >> 0x0003a57c <mapping0_inverse+884>: bgt 0x3a52c <mapping0_inverse+804> >> 0x0003a580 <mapping0_inverse+888>: cmp r3, #0 >> 0x0003a584 <mapping0_inverse+892>: ble 0x3a698 <mapping0_inverse+1168> >> 0x0003a588 <mapping0_inverse+896>: mov r4, #0 >> 0x0003a58c <mapping0_inverse+900>: ldr r3, [r9] >> 0x0003a590 <mapping0_inverse+904>: ldr r2, [r9, #32] >> 0x0003a594 <mapping0_inverse+908>: ldr r1, [r3, r4, lsl #2] >> 0x0003a598 <mapping0_inverse+912>: ldr r3, [r11, #-60] ; 0x3c >> 0x0003a59c <mapping0_inverse+916>: add r4, r4, #1 >> 0x0003a5a0 <mapping0_inverse+920>: ldr r0, [r3, r2, lsl #2] >> 0x0003a5a4 <mapping0_inverse+924>: mov r2, r1 >> 0x0003a5a8 <mapping0_inverse+928>: bl 0x3b148 <ff_imdct_calc> >> => 0x0003a5ac <mapping0_inverse+932>: ldr r5, [r11, #-64] ; 0x40 >> 0x0003a5b0 <mapping0_inverse+936>: ldr r12, [r5, #4] >> 0x0003a5b4 <mapping0_inverse+940>: cmp r12, r4 >> 0x0003a5b8 <mapping0_inverse+944>: bgt 0x3a58c <mapping0_inverse+900> >> 0x0003a5bc <mapping0_inverse+948>: subs r1, r12, #0 >> 0x0003a5c0 <mapping0_inverse+952>: ble 0x3a628 <mapping0_inverse+1056> >> 0x0003a5c4 <mapping0_inverse+956>: mov r6, #0 >> 0x0003a5c8 <mapping0_inverse+960>: mov r5, r6 >> 0x0003a5cc <mapping0_inverse+964>: ldr lr, [r11, #-76] ; 0x4c >> 0x0003a5d0 <mapping0_inverse+968>: ldr r3, [r9] >> 0x0003a5d4 <mapping0_inverse+972>: ldr r2, [r5, lr] >> 0x0003a5d8 <mapping0_inverse+976>: ldr r0, [r3, r5] >> 0x0003a5dc <mapping0_inverse+980>: cmp r2, #0 >> 0x0003a5e0 <mapping0_inverse+984>: bne 0x3a668 <mapping0_inverse+1120> >> 0x0003a5e4 <mapping0_inverse+988>: ldr r1, [r11, #-52] ; 0x34 >> 0x0003a5e8 <mapping0_inverse+992>: cmp r1, #0 >> End of assembler dump. >> >> Please let me know if there is anything more I can do. Received on 2010-09-20 Page template was last modified "Tue Sep 7 00:00:02 2021" The Rockbox Crew -- Privacy Policy |