######################################################################################### # ARPABET HAND-CORRECTOR # # # # DESCRIPTION: # # This script (modeled on Dan McCloy's SemiAutoPitchAnalysis.praat, available at # # http://students.washington.edu/drmccloy/resources/SemiAutoPitchAnalysis.praat and # # distributed under the GNU General Public License, copyright 2012) is designed to # # facilitate the hand-correction of P2FA-generated TextGrids by stepping through the # # intervals of interest and prompting the user to adjust the boundaries. It can # # automatically create a notes tier, and generates a progress file when the user hasn't # # finished correcting a file so that they can resume corrections later. All corrections # # are saved to a new TextGrid file, and a report file is created containing a summary # # of the actions taken # # # # NOTES: # # 1) IMPORTANT: this script is a skeleton, containing example target environments for # # hand-correction. To use it, replace the placeholders with the target # # environments/segments of interest the appropriate places in the file (in the settings # # and later on, look for the comments). 2) Remember to add a / or \ (depending on the # # OS) to the end of all paths. 3) Soundfiles and TextGrids must have identical names in # # order for the script to match them up. 4) To use the "targets" option, create a # # tab-delimited text file containing a list of the words (case-sensitive) you'd like to # # extract, separated by newlines, with "word" as the column header. # ######################################################################################### # DISPLAY SETTINGS FORM form Arpabet Hand-Corrector comment Paths: sentence Textgrid_directory sentence Soundfile_directory sentence Targets_file targets.txt comment Tiers: optionmenu Create_notes_tier: 2 option Yes option No comment Settings: # *****CHANGE THESE NAMES***** optionmenu Targets: 1 option CUSTOM OPTION 1 option CUSTOM OPTION 2 option CUSTOM OPTION 3 option Use targets file real Zoom_duration 0.25 endform # SET TIER NAMES, CHANGE IF NECESSARY phone_tier$ = "phone" word_tier$ = "word" # INITIALIZE VARIABLES file_count = 0 token_count = 0 skipped_count = 0 files_read$ = "" corrected_tokens$ = "" skipped_tokens$ = "" continue = 0 # OPEN THE TARGETS FILE IF THE USER HAS CHOSEN TO USE ONE if targets = 4 Read Table from tab-separated file... 'targets_file$' targets$ = selected$ ("Table", 1) endif # MAKE A LIST OF ALL SOUNDFILES IN THE FOLDER Create Strings as file list... list 'soundfile_directory$'*.wav files = Get number of strings # LOOP THROUGH THE LIST OF FILES... for file from 1 to files # READ IN THE SOUNDFILE AND FIND DURATION select Strings list filename$ = Get string... file Open long sound file... 'soundfile_directory$''filename$' soundfile$ = selected$ ("LongSound", 1) total_duration = Get total duration # INCREMENT FILE COUNT AND LOG file_count = file_count + 1 files_read$ = "'files_read$'" + "'soundfile$', " # RESET FLAGS progress_file_present = 0 corrected_file_present = 0 new_file = 1 # CHECK FOR PROGRESS FILE AND PREVIOUSLY CORRECTED TEXTGRID progress_file$ = "'soundfile_directory$''soundfile$'-progress" corrected_file$ = "'textgrid_directory$''soundfile$'-corrected.TextGrid" report_file$ = "'soundfile_directory$''soundfile$'-report.txt" if fileReadable (progress_file$) Read Matrix from raw text file... 'progress_file$' continue = 1 progress_file_present = 1 endif if fileReadable (corrected_file$) gridfile$ = "'corrected_file$'" gridname$ = "'soundfile$'-corrected" corrected_file_present = 1 if progress_file_present = 0 select Strings list plus LongSound 'soundfile$' if targets = 4 plus Table 'targets$' endif if progress_file_present = 1 plus Matrix 'soundfile$'-progress endif Remove exit Warning, a corrected TextGrid exists but there is no corresponding progress file. If the file has already been corrected, remove it from the directory, otherwise restore the progress file associated with it. endif else gridfile$ = "'textgrid_directory$''soundfile$'.TextGrid" gridname$ = "'soundfile$'" endif # OPEN THE CORRESPONDING TEXTGRID Read from file... 'gridfile$' # PERFORM INITIAL OPERATIONS ON TEXTGRID select TextGrid 'gridname$' call GetTier 'phone_tier$' phone_tier if phone_tier = -1 exit The tier 'phone_tier$' is missing from 'soundfile$'.TextGrid! endif call GetTier 'word_tier$' word_tier if word_tier = -1 exit The tier 'word_tier$' is missing from 'soundfile$'.TextGrid! endif if create_notes_tier = 1 call GetTier notes notes_tier if notes_tier = -1 Insert interval tier... (tiers+1) notes endif endif # LOOP THROUGH INTERVALS interval_count = Get number of intervals... phone_tier for interval to interval_count # GET NUMBER OF INTERVALS AGAIN IN CASE IT HAS CHANGED select TextGrid 'gridname$' interval_count = Get number of intervals... phone_tier # CHECK IF CONTINUING if continue = 1 select Matrix 'soundfile$'-progress interval = Get value in cell... 1 1 continue = 0 endif # GET PHONE LABEL select TextGrid 'gridname$' phone_label$ = Get label of interval... phone_tier interval # GET WORD LABEL vowelstart = Get starting point... phone_tier interval vowelend = Get end point... phone_tier interval midpoint = (vowelstart+vowelend)/2 word = Get interval at time... word_tier midpoint word_label$ = Get label of interval... word_tier word # GET PRECEDING PHONE LABELS if interval > 1 preceding_time = Get starting point... phone_tier (interval-1) preceding_int = Get interval at time... word_tier preceding_time preceding_word$ = Get label of interval... word_tier preceding_int if preceding_word$ <> word_label$ preceding_label$ = "" else preceding_label$ = Get label of interval... phone_tier (interval-1) if preceding_label$ = "sp" or preceding_label$ = "sil" preceding_label$ = "" endif endif if interval > 2 prepreceding_time = Get starting point... phone_tier (interval-2) prepreceding_int = Get interval at time... word_tier prepreceding_time prepreceding_word$ = Get label of interval... word_tier prepreceding_int if prepreceding_word$ <> word_label$ prepreceding_label$ = "" else prepreceding_label$ = Get label of interval... phone_tier (interval-2) if prepreceding_label$ = "sp" or prepreceding_label$ = "sil" prepreceding_label$ = "" endif endif else prepreceding_label$ = "" endif else preceding_label$ = "" endif # GET FOLLOWING PHONE LABEL if interval+1 <= interval_count following_time = Get starting point... phone_tier (interval+1) following_int = Get interval at time... word_tier following_time following_word$ = Get label of interval... word_tier following_int if following_word$ <> word_label$ following_label$ = "" else following_label$ = Get label of interval... phone_tier (interval+1) if following_label$ = "sp" or following_label$ = "sil" following_label$ = "" endif endif else following_label$ = "" endif # CHECK FOR THE TARGET word_label$ = replace_regex$ (word_label$, "[A-Z]", "\L&", 0) # *****INSERT CHOICE OF PHONE/WORD HERE, FOLLOWING THE EXAMPLE HERE***** if targets = 1 if phone_label$ = "AE1" or phone_label$ = "AE2" or phone_label$ = "EH1" or phone_label$ = "EH2" or phone_label$ = "EY1" or phone_label$ = "EY2" if following_label$ = "B" or following_label$ = "D" or following_label$ = "G" or following_label$ = "V" or following_label$ = "DH" or following_label$ = "Z" or following_label$ = "ZH" or following_label$ = "JH" or following_label$ = "" if skip_frame_sentence = 1 if word_label$ = "write" or word_label$ = "today" else call Correct endif else call Correct endif endif endif # *****INSERT CHOICE OF PHONE/WORD HERE, FOLLOWING THE EXAMPLE HERE***** elsif targets = 2 if phone_label$ = "AE1" or phone_label$ = "AE2" or phone_label$ = "EH1" or phone_label$ = "EH2" if following_label$ = "B" or following_label$ = "D" or following_label$ = "V" or following_label$ = "DH" or following_label$ = "Z" or following_label$ = "ZH" or following_label$ = "JH" or following_label$ = "" call Correct elsif following_label$ = "G" if preceding_label$ = "P" or preceding_label$ = "T" or preceding_label$ = "K" if prepreceding_label$ = "" call Correct endif endif endif elsif phone_label$ = "EY1" or phone_label$ = "EY2" if preceding_label$ = "P" or preceding_label$ = "T" or preceding_label$ = "K" if prepreceding_label$ = "" if following_label$ = "B" or following_label$ = "D" or following_label$ = "G" or following_label$ = "V" or following_label$ = "DH" or following_label$ = "Z" or following_label$ = "ZH" or following_label$ = "JH" or following_label$ = "" call Correct endif endif endif endif # *****INSERT CHOICE OF PHONE/WORD HERE, FOLLOWING THE EXAMPLE HERE***** elsif targets = 3 if phone_label$ = "AE1" or phone_label$ = "AE2" or phone_label$ = "EH1" or phone_label$ = "EH2" or phone_label$ = "EY1" or phone_label$ = "EY2" if preceding_label$ = "P" or preceding_label$ = "T" or preceding_label$ = "K" if prepreceding_label$ = "" if following_label$ = "B" or following_label$ = "D" or following_label$ = "G" or following_label$ = "V" or following_label$ = "DH" or following_label$ = "Z" or following_label$ = "ZH" or following_label$ = "JH" or following_label$ = "" call Correct endif endif endif endif # TARGETS FILE IS INTENDED FOR USE WITH VOWELS, CHANGE THESE IF INTERESTED IN CONSONANTS elsif targets = 4 if phone_label$ = "AO1" or phone_label$ = "AA1" or phone_label$ = "IY1" or phone_label$ = "UW1" or phone_label$ = "EH1" or phone_label$ = "IH1" or phone_label$ = "UH1" or phone_label$ = "AH1" or phone_label$ = "AX1" or phone_label$ = "AE1" or phone_label$ = "EY1" or phone_label$ = "AY1" or phone_label$ = "OW1" or phone_label$ = "AW1" or phone_label$ = "OY1" or phone_label$ = "ER1" or phone_label$ = "AXR1" or phone_label$ = "AO2" or phone_label$ = "AA2" or phone_label$ = "IY2" or phone_label$ = "UW2" or phone_label$ = "EH2" or phone_label$ = "IH2" or phone_label$ = "UH2" or phone_label$ = "AH2" or phone_label$ = "AX2" or phone_label$ = "AE2" or phone_label$ = "EY2" or phone_label$ = "AY2" or phone_label$ = "OW2" or phone_label$ = "AW2" or phone_label$ = "OY2" or phone_label$ = "ER2" or phone_label$ = "AXR2" select Table 'targets$' match = Search column... word 'word_label$' if match call Correct endif endif endif endfor # GENERATE FILE REPORT, REMOVE ALL OBJECTS FOR THAT FILE AND GO ON TO THE NEXT ONE call GenerateReport if fileReadable (progress_file$) filedelete 'progress_file$' endif select LongSound 'soundfile$' plus TextGrid 'gridname$' if progress_file_present = 1 plus Matrix 'soundfile$'-progress endif Remove select Strings list endfor # REMOVE REMAINING OBJECTS AND PRINT REPORT select Strings list if targets = 4 plus Table 'targets$' endif Remove clearinfo files_read$ = replace_regex$ (files_read$, ", $", ".", 0) printline Done. Read 'file_count' file(s): 'files_read$' # PROCEDURE TO SHOW EVALUATION WINDOW AND TAKE INPUT procedure Correct # PREVENT ZOOM DURATION FROM EXTENDING BEYOND THE ENDS OF THE FILE BUT MAINTAIN DESIRED WINDOW SIZE if not zoom_duration = 0 left_edge = midpoint - zoom_duration/2 right_edge = midpoint + zoom_duration/2 right_excess = right_edge - total_duration if left_edge < 0 zoom_start = 0 if zoom_duration > total_duration zoom_end = total_duration else zoom_end = zoom_duration endif elsif right_edge > total_duration zoom_end = total_duration if left_edge > right_excess zoom_start = zoom_end - zoom_duration else zoom_start = 0 endif else zoom_start = left_edge zoom_end = right_edge endif else zoom_start = 0 zoom_end = total_duration endif # CHECK IF FIRST INTERVAL, IF SO SET ALL SETTINGS AND SHOW EDITOR WINDOW if new_file = 1 select LongSound 'soundfile$' plus TextGrid 'gridname$' View & Edit editor TextGrid 'gridname$' Show analyses... yes yes yes yes no 10 Spectrogram settings... 0.0 5000.0 0.005 70.0 Zoom... zoom_start zoom_end Move cursor to... midpoint endeditor else editor TextGrid 'gridname$' Zoom... zoom_start zoom_end Move cursor to... midpoint endeditor endif new_file = 0 # SHOW THE ANALYSIS WINDOW beginPause ("Correct Boundaries") comment ("File: 'soundfile$' (#'file_count' of 'files')") comment ("Correct the vowel boundaries and click 'next' when finished.") clicked = endPause ("Next", "Skip", "Stop", 1, 3) # IF THE USER CLICKS "NEXT", LOG IT AND SAVE THE TEXTGRID if clicked = 1 token_count = token_count + 1 corrected_tokens$ = "'corrected_tokens$'" + "'word_label$', " editor TextGrid 'gridname$' Save TextGrid as text file... 'textgrid_directory$''soundfile$'-corrected.TextGrid endeditor # IF THE USER CLICKS "SKIP", LOG IT AND GO TO THE NEXT INTERVAL elsif clicked = 2 skipped_count = skipped_count + 1 skipped_tokens$ = "'skipped_tokens$'" + "'word_label$', " # IF THE USER CLICKS "STOP", GENERATE REPORT AND PROGRESS FILE, CLEAR WINDOW AND REMOVE OBJECTS elsif clicked = 3 if token_count > 0 or skipped_count > 0 if fileReadable (progress_file$) filedelete 'progress_file$' endif fileappend "'progress_file$'" 'interval' call GenerateReport endif select Strings list plus LongSound 'soundfile$' plus TextGrid 'gridname$' if targets = 4 plus Table 'targets$' endif if progress_file_present = 1 plus Matrix 'soundfile$'-progress endif Remove clearinfo files_read$ = replace_regex$ (files_read$, ", $", ".", 0) printline Done. Read 'file_count' file(s): 'files_read$' exit endif endproc # PROCEDURE TO FIND THE NUMBER OF A TIER WITH A GIVEN LABEL procedure GetTier name$ variable$ tiers = Get number of tiers itier = 1 repeat tier$ = Get tier name... itier itier = itier + 1 until tier$ = name$ or itier > tiers if tier$ <> name$ 'variable$' = 0 else 'variable$' = itier - 1 endif if 'variable$' = 0 'variable$' = -1 endif endproc # PROCEDURE TO GENERATE REPORTS FOR EACH FILE CORRECTED procedure GenerateReport # GET TIME/DATE AND FORMAT LISTS rundate$ = date$ () corrected_tokens$ = replace_regex$ (corrected_tokens$, ", $", ".", 0) skipped_tokens$ = replace_regex$ (skipped_tokens$, ", $", ".", 0) # APPEND REPORTS TO FILE fileappend "'report_file$'" 'rundate$''newline$'Corrected 'token_count' token(s): 'corrected_tokens$''newline$'Skipped 'skipped_count' token(s): 'skipped_tokens$''newline$''newline$' # CLEAR COUNTS AND LISTS FOR NEXT FILE token_count = 0 skipped_count = 0 corrected_tokens$ = "" skipped_tokens$ = "" endproc