感謝QQ群友詩諾比,發現在JZ4755使用double變數時,執行速度嚴重拖慢,司徒找了一下資料,發現新增一些編譯選項可以優化double變數的執行速度,如下程式是由詩諾比提供的測試程式
main.c
#include <stdio.h>
#include <stdlib.h>
typedef signed char s8;
typedef signed short s16;
typedef signed long s32;
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;
#define PI 3.14159265f
#define LGE_FREQ 8000 // 22050/8000/11025
#define SND_BUF_SIZE 512
#define RTTTL_NOTES_COUNT 53 //(1+4+48)
const s16 rtttl_notes[RTTTL_NOTES_COUNT] = {
0, 50, 100, 150, 200,
262, 277, 294, 311, 330, 349, 370, 392, 415, 440, 466, 494,
523, 554, 587, 622, 659, 698, 740, 784, 831, 880, 932, 988,
1047, 1109, 1175, 1245, 1319, 1397, 1480, 1568, 1661, 1760, 1865, 1976,
2093, 2217, 2349, 2489, 2637, 2794, 2960, 3136, 3322, 3520, 3729, 3951
};
u16 snd_len[RTTTL_NOTES_COUNT];
s8 snd_tab[RTTTL_NOTES_COUNT][SND_BUF_SIZE];
void rtttl_init(void)
{
int wsize, qsize, fsize, rsize;
int i, n, max_len = 0;
memset(snd_tab[0], 0, SND_BUF_SIZE);
snd_len[0] = 128;
for (i = 1; i < sizeof(rtttl_notes) / sizeof(rtttl_notes[0]); i++) {
s8* tab = snd_tab[i];
s16 freq = rtttl_notes[i];
u16 m = 0;
double d_max = 1;
s8 v = 0;
for (n = 0; n < SND_BUF_SIZE; n++) {
double d;
double r = n * freq / (double)LGE_FREQ;
d = r - (int)r;
if (r >= 1) {
if (d < d_max) {
d_max = d;
m = n;
}
}
tab[n] = 127 * sin(2 * PI * r);
}
snd_len[i] = m;
}
}
int main(int argc, char **argv)
{
rtttl_init();
printf("%ld, %ld\n", snd_tab[1][0], snd_len[1]);
return 0;
}
編譯、執行
$ gcc main.c -o main -lm
$ time ./main
real 0m8.247s
user 0m0.780s
sys 0m7.430s
新增編譯選項
$ gcc -mips32 -mtune=mips32 -G0 -fomit-frame-pointer -ffunction-sections -ffast-math -msoft-float -msingle-float -O3 main.c -o main -lm
$ time ./main
real 0m1.543s
user 0m0.260s
sys 0m1.280s