More progress on audio and rendering

2025-06-02 18:04:15 +02:00
parent 84805b92cb
commit 0c3e2aa730
14 changed files with 569 additions and 264 deletions
--- a/util/audio.c
+++ b/util/audio.c
@@ -6,6 +6,11 @@

 AudioData audioData;

+#define MAX_MIDI_EVENTS 1024
+MidiEvent midiEvents[MAX_MIDI_EVENTS];
+int midiEventCount = 0;
+int nextMidiEvent = 0;
+
 uint16_t getAvailableChannel() {
    for (uint16_t i = 0; i < NUM_SYNTH_VOICES; i++) {
        if (audioData.synthVoices[i].volume == 0) {
@@ -39,93 +44,209 @@ static void compute_stereo_gains(float pan, float *outL, float *outR) {
    // e.g. *outL *= 0.7071f; *outR *= 0.7071f;
 }

-// This callback now writes stereo frames: interleaved L/R floats.
+// Improved audio callback with anti-clipping and smooth fade-out
 void audio_callback(void *userdata, Uint8 *stream, int len) {
    AudioData *audio = (AudioData *) userdata;

-    // 'len' is total bytes; each sample‐frame is 2 floats (L+R), i.e. 2 * sizeof(float).
-    int frames = len / (2 * sizeof(float));
+    int frames = len / (2 * sizeof(float)); // Stereo frame count
+
+    float elapsedSec = audio->totalSamples / SAMPLE_RATE;
+    audio->totalSamples += frames;
+
+    while (nextMidiEvent < midiEventCount &&
+           midiEvents[nextMidiEvent].timeSec <= elapsedSec) {
+
+        MidiEvent *ev = &midiEvents[nextMidiEvent];
+
+        if (ev->type == 0 && ev->velocity > 0) {
+            // Note On
+            for (int i = NUM_SYNTH_VOICES - 4; i < NUM_SYNTH_VOICES; ++i) {
+                SynthVoice *v = &audio->synthVoices[i];
+                if (v->volume == 0) {
+                    float freq = 440.0f * powf(2.0f, (ev->note - 69) / 12.0f);
+                    v->frequency = (uint16_t) freq;
+                    v->volume = ev->velocity * 2;
+                    v->waveform = WAVE_SQUARE;
+                    v->smoothedAmp = 0;
+                    break;
+                }
+            }
+        } else {
+            // Note Off
+            for (int i = NUM_SYNTH_VOICES - 4; i < NUM_SYNTH_VOICES; ++i) {
+                SynthVoice *v = &audio->synthVoices[i];
+                float freq = 440.0f * powf(2.0f, (ev->note - 69) / 12.0f);
+                if ((uint16_t)freq == v->frequency) {
+                    v->volume = 0;
+                }
+            }
+        }
+
+        nextMidiEvent++;
+    }

-    // Zero out the entire output buffer (silence)
-    // We’ll accumulate into it.
-    // Each float is 4 bytes, so total floats = 2 * frames.
    float *outBuf = (float *) stream;
    for (int i = 0; i < 2 * frames; ++i) {
        outBuf[i] = 0.0f;
    }

-    // Precompute the listener center
    float listenerCx = audio->playerRect->x + audio->playerRect->w * 0.5f;

-    // For each synth voice, mix into the stereo buffer
+    int *voiceCounts = calloc(frames, sizeof(int));
+
    for (int v = 0; v < NUM_SYNTH_VOICES; v++) {
        SynthVoice *voice = &audio->synthVoices[v];
-        if (voice->volume == 0 || voice->frequency == 0) {
-            continue; // skip silent or inactive voices
-        }

-        // Compute source center X
-        float sourceCx = voice->sourceRect.x + voice->sourceRect.w * 0.5f;
+        if ((voice->volume == 0 && voice->smoothedAmp < 0.001f) || voice->frequency == 0)
+            continue;
+
+        float sourceCx = voice->sourceRect.x + TILE_SIZE * 0.5f;
        float dx = sourceCx - listenerCx;
-
-        // Normalize for pan. If |dx| >= maxPanDistance → full left or full right.
-        float pan = dx / audio->maxPanDistance;
-        if (pan < -1.0f) pan = -1.0f;
-        if (pan > +1.0f) pan = +1.0f;
+        float pan = fmaxf(-1.0f, fminf(+1.0f, dx / audio->maxPanDistance));

        float gainL, gainR;
        compute_stereo_gains(pan, &gainL, &gainR);
+        gainL *= 0.7071f;
+        gainR *= 0.7071f;

-        // Optional: You could also attenuate overall volume with distance
-        // float dist = fabsf(dx);
-        // float distanceAtten = 1.0f - fminf(dist / audio->maxPanDistance, 1.0f);
-        // float finalVolume = (voice->volume / 255.0f) * distanceAtten;
-        // But for now, we’ll just use voice->volume for amplitude.
+        float dist = fabsf(dx);
+        float distanceAtten = 1.0f - fminf(dist / audio->maxPanDistance, 1.0f);
+        float targetAmp = (voice->volume / 255.0f) * distanceAtten;

-        float amp = (voice->volume / 255.0f);
+        double phaseInc = ((double) voice->frequency * 256.0) / (double) SAMPLE_RATE;

-        // Phase increment per sample‐frame:
-        //   (freq * 256) / SAMPLE_RATE tells how many phase steps per mono-sample.
-        //   Because we’re writing stereo, we still advance phase once per frame.
-        uint8_t phaseInc = (uint8_t)((voice->frequency * 256) / SAMPLE_RATE);
-
-        // Mix into each frame
        for (int i = 0; i < frames; i++) {
-            float t = (float) voice->phase / 255.0f * 2.0f - 1.0f;
+            voice->smoothedAmp += (targetAmp - voice->smoothedAmp) * SMOOTHING_FACTOR;
+            float amp = voice->smoothedAmp;
+
+            double norm = voice->phase / 256.0;
+            double t = norm * 2.0 - 1.0;
            float sample;
+
            switch (voice->waveform) {
-                default:
-                case WAVE_SINE:
-                    sample = sinf(voice->phase * 2.0f * M_PI / 256.0f);
-                    break;
                case WAVE_SQUARE:
-                    sample = (t >= 0.0f) ? 1.0f : -1.0f;
+                    sample = (t >= 0.0) ? 1.0f : -1.0f;
                    break;
                case WAVE_SAWTOOTH:
-                    sample = t;
+                    sample = (float) t;
                    break;
                case WAVE_TRIANGLE:
-                    sample = (t < 0.0f) ? -t : t;
+                    sample = (float) ((t < 0.0) ? -t : t);
                    break;
                case WAVE_NOISE:
-                    sample = ((float) rand() / RAND_MAX) * 2.0f - 1.0f;
+                    sample = ((float) rand() / (float) RAND_MAX) * 2.0f - 1.0f;
+                    break;
+                default:
+                    sample = (float) sin(norm * 2.0 * M_PI);
                    break;
            }

            voice->phase += phaseInc;
+            if (voice->phase >= 256.0) voice->phase -= 256.0;
+            else if (voice->phase < 0.0) voice->phase += 256.0;

-            // Interleaved index: left = 2*i, right = 2*i + 1
            int idxL = 2 * i;
            int idxR = 2 * i + 1;

-            // Accumulate into buffer
            outBuf[idxL] += sample * amp * gainL;
            outBuf[idxR] += sample * amp * gainR;
+            voiceCounts[i]++;
+        }
+    }
+    for (int i = 0; i < frames; ++i) {
+        int count = voiceCounts[i];
+        if (count > 0) {
+            outBuf[2 * i + 0] /= count;
+            outBuf[2 * i + 1] /= count;
+        }
+    }
+    free(voiceCounts);
+}
+
+
+
+static uint32_t read_be_uint32(const uint8_t *data) {
+    return (data[0]<<24) | (data[1]<<16) | (data[2]<<8) | data[3];
+}
+
+static uint16_t read_be_uint16(const uint8_t *data) {
+    return (data[0]<<8) | data[1];
+}
+
+static uint32_t read_vlq(const uint8_t **ptr) {
+    uint32_t value = 0;
+    const uint8_t *p = *ptr;
+    while (*p & 0x80) {
+        value = (value << 7) | (*p++ & 0x7F);
+    }
+    value = (value << 7) | (*p++ & 0x7F);
+    *ptr = p;
+    return value;
+}
+
+void load_midi_file(const char *path) {
+    FILE *f = fopen(path, "rb");
+    if (!f) return;
+    fseek(f, 0, SEEK_END);
+    long size = ftell(f);
+    rewind(f);
+
+    uint8_t *data = malloc(size);
+    fread(data, 1, size, f);
+    fclose(f);
+
+    const uint8_t *ptr = data;
+    if (memcmp(ptr, "MThd", 4) != 0) return;
+    ptr += 8; // skip header length
+    uint16_t format = read_be_uint16(ptr); ptr += 2;
+    uint16_t nTracks = read_be_uint16(ptr); ptr += 2;
+    uint16_t ppqn = read_be_uint16(ptr); ptr += 2;
+
+    if (format != 0 || nTracks != 1) {
+        printf("Only Type 0 MIDI supported\n");
+        free(data);
+        return;
+    }
+
+    if (memcmp(ptr, "MTrk", 4) != 0) return;
+    uint32_t trackLen = read_be_uint32(ptr+4);
+    ptr += 8;
+    const uint8_t *trackEnd = ptr + trackLen;
+
+    float curTime = 0.0f;
+    uint32_t tempo = 500000; // default: 120 BPM
+    uint8_t lastStatus = 0;
+
+    while (ptr < trackEnd && midiEventCount < MAX_MIDI_EVENTS) {
+        uint32_t delta = read_vlq(&ptr);
+        curTime += (delta * (tempo / 1000000.0f)) / ppqn;
+
+        uint8_t status = *ptr;
+        if (status < 0x80) status = lastStatus;
+        else ptr++;
+
+        lastStatus = status;
+
+        if (status == 0xFF) {
+            uint8_t metaType = *ptr++;
+            uint32_t len = read_vlq(&ptr);
+            if (metaType == 0x51 && len == 3) {
+                tempo = (ptr[0]<<16 | ptr[1]<<8 | ptr[2]);
+            }
+            ptr += len;
+        } else if ((status & 0xF0) == 0x90 || (status & 0xF0) == 0x80) {
+            uint8_t note = *ptr++;
+            uint8_t vel  = *ptr++;
+            midiEvents[midiEventCount++] = (MidiEvent){
+                    .timeSec = curTime,
+                    .type = (status & 0xF0) == 0x90 ? 0 : 1,
+                    .note = note,
+                    .velocity = vel
+            };
+        } else {
+            ptr += 2; // skip unknown
        }
    }

-    // Note: We did not normalize by active voices here, because each voice already
-    //   uses its own volume. If you still want an automatic “divide by N active voices”,
-    //   you would need to track active voices per‐frame, which is relatively expensive.
-    //   In practice, you manage the volume per voice so clipping doesn’t occur.
-}
+    free(data);
+}