#########################################################################################
# VOWEL ANALYZER                                                                        #
#                                                                                       #
# DESCRIPTION:                                                                          #
# This script (modeled on Mietta Lennes' collect_formant_data_from_files.praat          #
# available at http://www.helsinki.fi/~lennes/praat-scripts/ and distributed under the  #
# GNU General Public License, copyright 4/7/2003) is designed to be run on a set of     #
# soundfiles and TextGrids. It extracts duration (in ms), timestamps (in s), and        #
# F1/F2/F3 from all labeled intervals, and extracts labels for corresponding word,      #
# along with any notes present in the TextGrid. Last, the script outputs the analyst    #
# name, date, settings, OS, and Praat version to the results file. The script can also  #
# be constrained to a user-defined set of words using the "targets" option.             #
#                                                                                       #
# NOTES:                                                                                #
# 1) Remember to add a / or \ (depending on the OS) to the end of all paths. 2)         #
# Soundfiles and TextGrids must have identical names in order for the script to match   #
# them up. 3) To use the "targets" option, make sure your TextGrid includes a word tier #
# and the "use word tier" option is selected, then create a tab-delimited text file     #
# containing a list of the words (case-sensitive) you'd like to extract, separated by   #
# newlines, with "word" as the column header. 4) If you use Unicode characters in your  #
# TextGrids, make sure Praat's text writing preferences are set to UTF-8 in Windows     #
# before running the script. If you intend to view the results file in Excel, in        #
# Windows you will have to open it from within Excel and import it specifying UTF-8     #
# encoding in order for the characters to display correctly. On OSX, you will first     #
# have to convert the file to UTF-16 Little Endian using a text editor, then import it  #
# into Excel.                                                                           #
#                                                                                       #
# CHANGELOG:                                                                            #
# 09/30/22: Added error handling for pitch extraction, changed default file extension.  #
# 02/08/14: Rewrote code for results file generation, added pitch extraction option.    #
# 02/07/14: Added metadata output to the script (analyst, version, settings, etc.).     #
# 10/23/13: Changed the behavior of the script to target only non-empty intervals.      #
# 06/26/13: Fixed bug introduced by a recent version of Praat.                          #
# 03/10/13: Reordered formant options.                                                  #
# 02/22/13: Added option to append data to an existing results file.                    #
# 02/19/13: Added counter for number of vowels analyzed if using targets option.        #
# 02/14/13: Fixed a bug involving running the script over multiple files, simplified    #
#           the extraction of sounds from longsounds.                                   #
# 01/19/13: Added ability to select the formant measurement points.                     #
# 01/01/13: Release version.                                                            #
#                                                                                       #
# This modified script distributed under the GNU General Public License v3 or higher,   #
# copyright 1/2013, John Riebold (riebold@uw.edu).                                      #
#########################################################################################

# PROMPT THE USER FOR LOCATION OF INPUT/OUTPUT FILES, FORMANT SETTINGS, ETC.
form Vowel Analyzer
	comment Paths:
	sentence Soundfile_directory
	sentence Textgrid_directory
	sentence Results_file results.tsv
	optionmenu Use_targets_file 2
		option yes
		option no
	sentence Targets_file targets.txt
	comment Tiers:
	sentence Vowel_tier vowel
	optionmenu Use_word_tier: 2
		option yes
		option no
	sentence Word_tier word
	optionmenu Use_notes_tier: 2
		option yes
		option no
	sentence Notes_tier notes
	comment Formant settings:
	optionmenu Measurement_points: 4
		option Midpoint
		option 30%/50%/70%
		option 25%/50%/75%
		option 20%/50%/80%
	positive Maximum_formant_(Hz) 5500
	integer Number_of_formants 5
	comment Pitch Settings
	optionmenu Extract_pitch: 2
		option yes
		option no
	integer left_Pitch_range_(Hz) 75
	integer right_Pitch_range_(Hz) 500
	comment Analyst:
	sentence Initials
endform

if use_targets_file = 1 and use_word_tier = 2
	exit Error: the targets file option requires a word tier.
endif

# SET ADDITIONAL FORMANT OPTIONS, CHANGE IF NECESSARY
preemphasis_from = 50
window_length = 0.025
time_step = 0.01

# DEFINE EMPTY VARIABLES IN CASE TIERS ARE EMPTY/NOT PRESENT IN THE TEXTGRID
word_label$ = ""
notes_label$ = ""
preceding_label$ = ""
following_label$ = ""

# DEFINE DUMMY COUNTER VARIABLES FOR END-OF-SCRIPT REPORT
sound_count = 0
vowel_count = 0
target_vowel_count = 0

# GET TIME AND OS
rundate$ = date$ ()
if windows = 1
    os$ = "Windows"
elsif macintosh = 1
    os$ = "OSX"
elsif unix = 1
	os$ = "Linux"
endif
version$ = "'praatVersion'"
version$ = replace_regex$ ("'version$'", "(\d)(\d)(\d{2,2})", "\1.\2.\3", 0)

# INITIALIZE RESULTS FILE
if fileReadable (results_file$)
	beginPause ("Warning")
		comment ("The file 'results_file$' already exists.")
	results_choice = endPause ("Append", "Overwrite", 1)
	if results_choice = 2
		filedelete 'results_file$'
		call InitializeResultsFile
	endif
else
	call InitializeResultsFile
endif

# OPEN TARGETS FILE
if use_targets_file = 1
	Read Table from tab-separated file... 'targets_file$'
	targets$ = selected$ ("Table", 1)
endif

# CREATE LIST OF SOUNDFILES IN DIRECTORY
Create Strings as file list... list 'soundfile_directory$'*.wav
numberoffiles = Get number of strings

# GO THROUGH EACH SOUNDFILE
for ifile to numberoffiles
	select Strings list
	filename$ = Get string... ifile

	# OPEN SOUNDFILE
	Open long sound file... 'soundfile_directory$''filename$'
	soundfile$ = selected$ ("LongSound", 1)

	# INCREMENT SOUND COUNT
	sound_count = sound_count + 1

	# OPEN A TEXTGRID OF THE SAME NAME
	gridfile$ = "'textgrid_directory$''soundfile$'.TextGrid"
	if fileReadable (gridfile$)
		Read from file... 'gridfile$'

		# FIND TIER NUMBER FOR VOWEL AND WORD TIERS
		call GetTier 'vowel_tier$' vowel_tier
		if use_word_tier = 1
			call GetTier 'word_tier$' word_tier
		endif
		intervals = Get number of intervals... vowel_tier

		# EXTRACT ANNOTATED PORTION OF SOUNDFILE
		gridstart = Get start time
		gridend = Get end time
		select LongSound 'soundfile$'
		Extract part... gridstart gridend yes

		# REMOVE LONGSOUND
		select LongSound 'soundfile$'
		Remove

		# EXTRACT FORMANT AND PITCH OBJECTS
		select Sound 'soundfile$'
		To Formant (burg)... time_step number_of_formants maximum_formant window_length preemphasis_from
		if extract_pitch = 1
			select Sound 'soundfile$'
			nocheck To Pitch... 0 left_Pitch_range right_Pitch_range
		endif

		# PASS THROUGH EACH INTERVAL IN SELECTED TIER AND GET LABEL
		for interval to intervals
			select TextGrid 'soundfile$'
			phone_label$ = Get label of interval... vowel_tier interval

			# MAKE SURE LABEL NOT EMPTY
			if phone_label$ <> ""

				# INCREMENT VOWEL COUNT
				vowel_count = vowel_count + 1

				# GET START AND END TIMES, CALCULATE DURATION, ETC.
				start = Get starting point... vowel_tier interval
				end = Get end point... vowel_tier interval
				duration = (end-start)
				duration_ms = duration*1000
				midpoint = (start+end)/2

				# DETERMINE WHICH POINTS TO MEASURE
				if measurement_points = 2
					onset = start+(duration*0.3)
					offset = end-(duration*0.3)
				elsif measurement_points = 3
					onset = start+(duration/4)
					offset = end-(duration/4)
				elsif measurement_points = 4
					onset = start+(duration/5)
					offset = end-(duration/5)
				endif

				# GET FORMANT VALUES AT INTERVAL(S)
				select Formant 'soundfile$'
				f1_2 = Get value at time... 1 midpoint Hertz Linear
				f2_2 = Get value at time... 2 midpoint Hertz Linear
				f3_2 = Get value at time... 3 midpoint Hertz Linear
				if measurement_points != 1
					f1_1 = Get value at time... 1 onset Hertz Linear
					f2_1 = Get value at time... 2 onset Hertz Linear
					f3_1 = Get value at time... 3 onset Hertz Linear
					f1_3 = Get value at time... 1 offset Hertz Linear
					f2_3 = Get value at time... 2 offset Hertz Linear
					f3_3 = Get value at time... 3 offset Hertz Linear
				endif

				# EXTRACT PITCH AT INTERVAL(S)
				if extract_pitch = 1
					nocheck select Pitch 'soundfile$'
					f0_2 = nocheck Get value at time... midpoint Hertz Linear
					if measurement_points != 1
						f0_1 = nocheck Get value at time... onset Hertz Linear
						f0_3 = nocheck Get value at time... offset Hertz Linear
					endif
				endif

				# GET WORD VOWEL IS FROM
				if use_word_tier = 1
					select TextGrid 'soundfile$'
					word = Get interval at time... word_tier midpoint
					word_label$ = Get label of interval... word_tier word
				endif

				# GET CONTENTS OF NOTES TIER
				if use_notes_tier = 1
					call GetTier 'notes_tier$' notes_tier
					note = Get interval at time... notes_tier midpoint
					notes_label$ = Get label of interval... notes_tier note
				endif

				# CREATE RESULTS LINE
				if use_word_tier = 1
					resultsline_begin$ = "'soundfile$'	'word_label$'	'phone_label$'	'start'	'end'	'duration_ms'	"
				else
					resultsline_begin$ = "'soundfile$'	'phone_label$'	'start'	'end'	'duration_ms'	"
				endif
				if use_notes_tier = 1
					resultsline_end$ = "'notes_label$'	'initials$'	'rundate$'	Max formant: 'maximum_formant' Hz, Number of formants: 'number_of_formants', Window length: 'window_length' s	'version$'	'os$''newline$'"
				else
					resultsline_end$ = "'initials$'	'rundate$'	Max formant: 'maximum_formant' Hz, Number of formants: 'number_of_formants', Window length: 'window_length' s	'version$'	'os$''newline$'"
				endif
				resultsline_middle$ = "'f1_1'	'f1_2'	'f1_3'	'f2_1'	'f2_2'	'f2_3'	'f3_1'	'f3_2'	'f3_3'	"
				if measurement_points = 1
					resultsline_middle$ = "'f1_2'	'f2_2'	'f3_2'	"
					if extract_pitch = 1
						resultsline_middle$ = "'f0_2'	" + resultsline_middle$
					endif
				elsif measurement_points != 1 and extract_pitch = 1
					resultsline_middle$ = "'f0_1'	'f0_2'	'f0_3'	" + resultsline_middle$
				endif
				resultsline$ = resultsline_begin$ + resultsline_middle$ + resultsline_end$

				# OUTPUT TO RESULTS FILE
				if use_targets_file = 1
					select Table 'targets$'
					match = Search column... word 'word_label$'
					if match
						target_vowel_count = target_vowel_count + 1
						fileappend "'results_file$'" 'resultsline$'
					endif
				else
					target_vowel_count = target_vowel_count + 1
					fileappend "'results_file$'" 'resultsline$'
				endif
			endif
		endfor

		# REMOVE TEXTGRID OBJECT FROM THE OBJECT LIST
		select TextGrid 'soundfile$'
		Remove
	endif

	# REMOVE TEMPORARY OBJECTS AND CONTINUE TO NEXT FILE
	select Sound 'soundfile$'
	plus Formant 'soundfile$'
	if extract_pitch = 1
		nocheck plus Pitch 'soundfile$'
	endif
	Remove
endfor

# REMOVE REST OF OBJECTS AND FINISH
select Strings list
if use_targets_file = 1
	plus Table 'targets$'
endif
Remove

# PRINT A REPORT
echo Done. Analyzed 'target_vowel_count' of 'vowel_count' vowels in 'sound_count' file(s).

# PROCEDURE TO INITIALIZE RESULTS FILE
procedure InitializeResultsFile
	if use_word_tier = 1
		header_begin$ = "Filename	Word	Vowel	Begin Time (s)	End Time (s)	Duration (ms)	"
	else
		header_begin$ = "Filename	Vowel	Begin Time (s)	End Time (s)	Duration (ms)	"
	endif
	if use_notes_tier = 1
		header_end$ = "Notes	Analyst	Date	Settings	Praat Version	OS'newline$'"
	else
		header_end$ = "Analyst	Date	Settings	Praat Version	OS'newline$'"
	endif
	if measurement_points = 1
		header_middle$ = "F1 50%	F2 50%	F3 50%	"
		if extract_pitch = 1
			header_middle$ = "F0 50%	" + header_middle$
		endif
	elsif measurement_points = 2
		header_middle$ = "F1 30%	F1 50%	F1 70%	F2 30%	F2 50%	F2 70%	F3 30%	F3 50%	F3 70%	"
		if extract_pitch = 1
			header_middle$ = "F0 30%	F0 50%	F0 70%	" + header_middle$
		endif
	elsif measurement_points = 3
		header_middle$ = "F1 25%	F1 50%	F1 75%	F2 25%	F2 50%	F2 75%	F3 25%	F3 50%	F3 75%	"
		if extract_pitch = 1
			header_middle$ = "F0 25%	F0 50%	F0 75%	" + header_middle$
		endif
	elsif measurement_points = 4
		header_middle$ = "F1 20%	F1 50%	F1 80%	F2 20%	F2 50%	F2 80%	F3 20%	F3 50%	F3 80%	"
		if extract_pitch = 1
			header_middle$ = "F0 20%	F0 50%	F0 80%	" + header_middle$
		endif
	endif
	header$ = header_begin$ + header_middle$ + header_end$
	fileappend "'results_file$'" 'header$'
endproc

# PROCEDURE TO FIND NUMBER OF TIER WITH GIVEN LABEL
procedure GetTier name$ variable$
	numberOfTiers = Get number of tiers
	itier = 1
	repeat
		tier$ = Get tier name... itier
		itier = itier + 1
	until tier$ = name$ or itier > numberOfTiers
	if tier$ <> name$
		'variable$' = 0
	else
		'variable$' = itier - 1
	endif
	if 'variable$' = 0
		exit The tier 'name$' is missing from the file 'soundfile$'!
	endif
endproc