######################################################################################### # file: speech_to_text.py # type: Python # date: 01_NOVEMBER_2024 # author: karbytes # license: PUBLIC_DOMAIN ######################################################################################### import speech_recognition as sr from pydub import AudioSegment def mp3_to_text(): # Hardcoded file paths mp3_file_path = "input_audio.mp3" output_file_path = "output_text.txt" # Convert MP3 to WAV audio = AudioSegment.from_mp3(mp3_file_path) wav_file_path = "temp_audio.wav" # user can manually delete or keep the intermediary WAV file audio.export(wav_file_path, format="wav") # Initialize recognizer recognizer = sr.Recognizer() # Load the WAV file and transcribe it with sr.AudioFile(wav_file_path) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) print("Transcription successful!") # Write transcription to a text file with open(output_file_path, "w") as text_file: text_file.write(text) print(f"Transcription saved to {output_file_path}") except sr.UnknownValueError: print("Could not understand audio") except sr.RequestError as e: print(f"Could not request results from Speech Recognition service; {e}") if __name__ == "__main__": mp3_to_text()