Hi everyone,
I’m a bit new to developing scripts in Psychopy and was wondering if anyone had any assistance they could provide. I saw the transcribe() function from the Audioclip class (documentation) has integrated OpenAI’s Whisper transcription software into psychopy.
Here are my script steps:
- Participants see a question
- They press a button to start recording their response
- Save the response as a .wav file
- Loop to the next question until the questions are finished.
I then wanted to use the transcribe() function to load my sound files, transcribe the audio, and save it as a .txt. The recording portion of the task works, but not the transcription portion. I know this is a new feature, but I’m a bit lost.
Here is the relevant code:
import psychopy.sound as sound
from psychopy import visual, core, event, sound, gui
from psychopy import prefs
import os.path
#Gotta mess with these preferences cause the audio sounds like donkey
prefs.hardware['audioLib'] = 'PTB' #apparently PTB has better timing according to an error I saw
#prefs.hardware['audioLatencyMode'] = 3
expClock = core.Clock()
mic = sound.Microphone(channels=2, streamBufferSecs=15)
path = 'path\to\file'
#make a gui to input participant ID
gui = gui.Dlg(title='Cued recall experiment')
gui.addField('Subject ID:')
gui.show()
sub_ID = gui.data[0]
audioClip=None #Initalize the transcriber audio. Not sure if necessary, said to do it on the psychopy website for transcribe()
win = visual.Window(
size=(1920, 1080), fullscr=True, screen=0,
allowGUI=False, allowStencil=False,
monitor='testMonitor', color=u'grey', colorSpace='rgb',
blendMode='avg', useFBO=True)
#I couldn't remember the questions
questions = [
"Was this a good movie?",
"do you have any opinions at all?",
"Any suggestions for improvement?",
]
recording_button = visual.TextStim(win, text='Press SPACE to start recording', pos=(0, -0.5))
for i, question in enumerate(questions): # display the questions, i is for saving the .wavs for later
question_text = visual.TextStim(win, text=question, pos=(0, 0.5))
question_text.draw()
recording_button.draw()
win.flip()
# Wait for the SPACE key to start recording
event.waitKeys(keyList=['space'])
# Start recording
mic.start()
recording_button.text = 'Recording... Press SPACE to stop'
recording_button.draw()
win.flip()
# Wait for the SPACE key to stop recording
event.waitKeys(keyList=['space'])
# Stop recording
recording_button.text = 'Press SPACE to begin recording'
recording_button.draw()
mic.stop()
win.flip()
# Get the recorded audio
recorded_audio = mic.getRecording()
# Save the audio to a WAV file
recorded_audio.save(f'participant_{sub_ID}_response_{i+1}.wav')
#in case we need to wait between questions
#core.wait(1.0)
# Loop through questions
for i, question in enumerate(questions):
# Load the audio clip
loaded_audio = sound.AudioClip.load(f'{path}participant_{sub_ID}_response_{i+1}.wav')
# Transcribe the loaded audio using the 'whisper' engine
transcription_result = loaded_audio.transcribe(engine='whisper', language='en-US', expectedWords=None, config=None)
# Get the transcription text from the result
transcription_text = transcription_result.getText()
# Create the complete file path using os.path.join
completeName = os.path.join(path, f'participant_{sub_ID}_response_{i+1}.txt')
# Save the transcription to a text file
with open(completeName, 'w') as txt_file:
txt_file.write(transcription_text)
mic.close()
win.close()