/* * Audio speed scaling * * Copyright (C) 2006-2007 by Nicolas Pitre * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include /* * Default samplerate. Should be multiplied by 2 for stereo sound. */ #define SAMPLERATE (44100 * 2) /* * Number of frames processed by second. If set too high then distortion * will occur in the presence of frequencies lower than this value in the * audio. If set too low then echo artifacts in the slow cases, or some * signal loss in the fast cases, will become apparent. */ #define FRAME_FREQ 60 /* * FRame overlap shift precision. Computation cost and quality drop with * bigger values. This must be at least 1 for mono sound, at least 2 and * an even value for stereo sound. */ #define SHIFT_INC 2 /* * Auto-correlator precision. Computation cost and quality drop with * bigger values. This must be an odd value for stereo sound. */ #define CORR_INC 1 typedef struct { int crossfade_order; /* power of two for crossfade_size */ int shift_max; /* maximum frame displacement for crossfade */ int src_frame_sz; /* required samples to process a frame */ int src_step; /* source window pace */ int dst_step; /* destination window pace */ int ovl_shift; /* overlap buffer frame shift */ int ovl_size; /* overlap buffer size */ short *ovl_buff; /* overlap buffer */ } scale_state; void scale_cleanup(scale_state *st) { free(st->ovl_buff); free(st); } scale_state *scale_init(int samplerate, float factor) { scale_state *st; int crossfade_size, ovl_space; if (samplerate < 8000) return NULL; st = malloc(sizeof(*st)); if (!st) return NULL; #if 0 /* soundtouch params */ st->shift_max = samplerate * 28 / 1000; crossfade_size = samplerate * 12 / 1000; st->crossfade_order = 0; while (crossfade_size >>= 1) st->crossfade_order++; crossfade_size = 1 << st->crossfade_order; st->dst_step = (samplerate * 78 / 1000) & ~1; /* even for stereo */ st->src_step = st->dst_step * factor; st->src_step &= ~1; /* even for stereo */ #else st->dst_step = samplerate / FRAME_FREQ; if (factor > 1) st->dst_step /= factor; st->crossfade_order = 1; while (st->dst_step >>= 1) st->crossfade_order++; st->dst_step = crossfade_size = (1 << st->crossfade_order); st->src_step = st->dst_step * factor; st->src_step &= ~1; /* even for stereo */ st->shift_max = (st->dst_step > st->src_step) ? st->dst_step : st->src_step; #endif st->src_frame_sz = st->shift_max + st->dst_step; if (2*crossfade_size > st->src_step + st->dst_step) st->src_frame_sz += 2*crossfade_size - (st->src_step + st->dst_step); /* largest incomplete frame */ ovl_space = st->src_frame_sz - 1; /* round up to st->src_step */ ovl_space = (ovl_space + st->src_step - 1) / st->src_step * st->src_step; /* if prev_frame might still point to ancient data then add another loop */ if (st->src_step - crossfade_size > ovl_space - (st->src_frame_sz - 1)) ovl_space += st->src_step; /* then account for the initial ovl_shift when it is positive */ if (st->src_step - crossfade_size > 0) ovl_space += st->src_step - crossfade_size; /* then back a step and add a frame size */ ovl_space = ovl_space + st->src_frame_sz - st->src_step; fprintf(stderr, "src_step\t%d\ndst_step\t%d\ncrossfade\t%d\n" "shift_max\t%d\nsrc_frame_sz\t%d\novl_space\t%d\n", st->src_step, st->dst_step, crossfade_size, st->shift_max, st->src_frame_sz, ovl_space); st->ovl_size = 0; st->ovl_shift = 0; st->ovl_buff = malloc(ovl_space * sizeof(short)); if (st->ovl_buff) return st; scale_cleanup(st); return NULL; } int scale_apply(short *buf_out, short *buf_in, int data_len, int last, scale_state *st) { short *next_frame, *prev_frame, *curr, *prev, *dest; int i, j, shift, crossfade_size = 1 << st->crossfade_order; /* deal with overlap data first, if any */ if (st->ovl_size) { int have, need, copy; have = st->ovl_size; if (st->ovl_shift > 0) have -= st->ovl_shift; /* append just enough data to have all of the overlap buffer consumed */ need = (have + st->src_step - 1) / st->src_step * st->src_step; if (st->src_step - crossfade_size > need - have) need += st->src_step; /* one more loop to move prev_frame out */ copy = need - have + st->src_frame_sz - st->src_step; if (copy > data_len) copy = data_len; memcpy(st->ovl_buff + st->ovl_size, buf_in, copy * sizeof(short)); if (!last && have + copy < st->src_frame_sz) { /* still not enough to process at least one frame */ st->ovl_size += copy; return 0; } /* recursively call ourselves to process the overlap buffer */ have = st->ovl_size; st->ovl_size = 0; if (copy == data_len) return scale_apply(buf_out, st->ovl_buff, have+copy, last, st); dest = buf_out + scale_apply(buf_out, st->ovl_buff, have+copy, -1, st); /* readjust pointers to account for data already consumed */ next_frame = buf_in + copy - st->src_frame_sz + st->src_step; prev_frame = next_frame - st->ovl_shift; } else { dest = buf_out; next_frame = prev_frame = buf_in; if (st->ovl_shift > 0) next_frame += st->ovl_shift; else prev_frame += -st->ovl_shift; } st->ovl_shift = 0; /* process all complete frames */ while (data_len - (next_frame - buf_in) >= st->src_frame_sz) { /* find best autocorrelation for frame overlap */ #if 0 /* original version */ long long min_delta = (~0ULL) >> 1; /* most positive */ shift = 0; for (i = 0; i < st->shift_max; i += SHIFT_INC) { long long delta = 0; curr = next_frame + i; prev = prev_frame; for (j = 0; j < crossfade_size; j += CORR_INC) { int diff = curr[j] - prev[j]; delta += (long long)diff * diff; if (delta >= min_delta) goto skip; } min_delta = delta; shift = i; skip:; } #elif 1 /* soundtouch behavior (unoptimized) */ int max_corr = ~(~0U >> 1); /* most negative */ short prev_curved[crossfade_size]; for (j = 0; j < crossfade_size; j += CORR_INC) prev_curved[j] = ((((prev_frame[j] * j) >> st->crossfade_order) * (crossfade_size - j)) >> st->crossfade_order); shift = 0; for (i = 0; i < st->shift_max; i += SHIFT_INC) { int corr = 0; curr = next_frame + i; prev = prev_curved; for (j = 0; j < crossfade_size; j += CORR_INC) corr += (curr[j] * prev[j]) >> st->crossfade_order; if (corr > max_corr) { max_corr = corr; shift = i; } } #else /* new version */ long long min_delta = (~0ULL) >> 1; /* most positive */ shift = 0; for (i = 0; i < st->shift_max; i += SHIFT_INC) { long long delta = 0; curr = next_frame + i + crossfade_size/2; prev = prev_frame + crossfade_size/2; for (j = 0; j < crossfade_size/2; j = j*3 + 1) { int diff; diff = curr[ j] - prev[ j]; delta += (long long)diff * diff; diff = curr[-j] - prev[-j]; delta += (long long)diff * diff; if (delta >= min_delta) goto skip; } min_delta = delta; shift = i; skip:; } #endif /* overlap fade-out of previous frame with fade-in of current frame */ curr = next_frame + shift; prev = prev_frame; for (i = 0, j = crossfade_size; j; i++, j--) *dest++ = (*curr++ * i + *prev++ * j) >> st->crossfade_order; if (st->dst_step - i > 0) { memcpy(dest, curr, (st->dst_step - i) * sizeof(short)); dest += st->dst_step - i; curr += st->dst_step - i; } /* adjust pointers for next frame */ prev_frame = curr; next_frame += st->src_step; } /* now deal with remaining partial frames */ if (last == -1) { /* special overlap buffer processing: remember frame shift only */ st->ovl_shift = next_frame - prev_frame; } else if (last != 0) { /* last call: purge all remaining data to output buffer */ i = data_len - (prev_frame - buf_in); memcpy(dest, prev_frame, i * sizeof(short)); dest += i; } else { /* preserve remaining data + needed overlap data for next call */ st->ovl_shift = next_frame - prev_frame; curr = (st->ovl_shift < 0) ? next_frame : prev_frame; st->ovl_size = data_len - (curr - buf_in); memcpy(st->ovl_buff, curr, st->ovl_size * sizeof(short)); } return dest - buf_out; } int scale_next_required_space(int data_len, int last, scale_state *st) { int src_size, nb_frames, dst_space; src_size = data_len + st->ovl_size; if (st->ovl_shift > 0) src_size -= st->ovl_shift; if (src_size < st->src_frame_sz) { if (!last) return 0; dst_space = data_len + st->ovl_size; if (st->ovl_shift < 0) dst_space += st->ovl_shift; } else { nb_frames = (src_size - st->src_frame_sz) / st->src_step + 1; dst_space = nb_frames * st->dst_step; if (last) { dst_space += src_size - nb_frames * st->src_step; dst_space += st->src_step - st->dst_step; } } return dst_space; } int main(int argc, char *argv[]) { scale_state *st; int insz, outsz, blocksz, bufspace, last; short *in, *out; if (argc < 2 || argc > 3 || !(st = scale_init(SAMPLERATE, atof(argv[1])))) { fprintf(stderr, "Usage: %s []\n" " (data read from stdin, written to stdout\n", argv[0]); return -1; } if (argc == 3) blocksz = atoi(argv[2]); else blocksz = 4096; in = malloc(blocksz * sizeof(short)); do { insz = fread(in, sizeof(short), blocksz, stdin); if (insz < 0) { perror("fread()"); return -1; } last = (insz != blocksz); bufspace = scale_next_required_space(insz, last, st); out = (bufspace) ? malloc(bufspace * sizeof(short)) : NULL; outsz = scale_apply(out, in, insz, last, st); fwrite(out, sizeof(short), outsz, stdout); free(out); } while (!last); free(in); scale_cleanup(st); return 0; }