from piper.voice import PiperVoice as piper # Backbone of text to speech import pyaudio voice = piper.load("path/to/voice.onnx") # Load the model chunks = voice.synthesize("text to be synthesized by the model") # Set up Piper to stream audio first_chunk = next(chunks) # Get the first chunk to set up audio stream configuration p = pyaudio.PyAudio() stream = p.open( # Open audio stream with correct settings format=p.get_format_from_width(first_chunk.sample_width), channels=first_chunk.sample_channels, rate=first_chunk.sample_rate, output=True, ) stream.write(first_chunk.audio_int16_bytes) # Play the first chunk for chunk in chunks: # Play subsequent chunks of audio stream.write(chunk.audio_int16_bytes) # Cleanup stream.stop_stream() stream.close() p.terminate()