param waveforms: A list of possibly variable length 16000Hz audio samples
param batch_size: The batch_size associated with the Whisper model being used to transcribe the audio. Used to prevent JIT mismatch errors since the encoder does not accept symbolic shapes
param truncate: If true, truncates (or pads) audio to exactly 30s for a single encoder pass
return: mel spectrogram of the given waveforms
const
prep_audio = (_waveforms:
Float32Array[], batch_size: number, truncate: boolean = false) => unknown