@@ -81,8 +81,8 @@ def process(self):
8181 frameshift : int = 1 # variables.frameshift
8282 time_praat : pd .DataFrame
8383 f0_praat : pd .DataFrame
84- time_praat , f0_praat = get_raw_pitch (audio_file_path )
85- f0 : np .array = refine_pitch_voice_sauce (time_praat , f0_praat )
84+ time_praat , f0_praat = self . get_raw_pitch (audio_file_path )
85+ f0 : np .array = self . refine_pitch_voice_sauce (time_praat , f0_praat )
8686 self .f0 : np .array = f0
8787 signal , sampling_rate = self .args ['voice' ]
8888 sound : parselmouth .Sound = parselmouth .Sound (signal , sampling_rate )
@@ -96,7 +96,7 @@ def process(self):
9696 # Calculate Energy
9797
9898 try :
99- energy : Union [np .array , str , list ] = get_energy_voice_sauce (audio_file_path )
99+ energy : Union [np .array , str , list ] = self . get_energy_voice_sauce (audio_file_path )
100100 except Exception as e :
101101 energy = str (e )
102102
@@ -129,159 +129,159 @@ def process(self):
129129 }
130130
131131
132- def get_energy_voice_sauce (audio_file_path : str ) -> Union [np .array , str ]:
133- """Get energy from Voice Sauce formula
132+ def get_energy_voice_sauce (self , audio_file_path : str ) -> Union [np .array , str ]:
133+ """Get energy from Voice Sauce formula
134134
135- :param audio_file_path: path to audio file
136- :type audio_file_path: str
137- :return: energy: Energy values or error message
138- :rtype: Union[np.array, str]
139- """
135+ :param audio_file_path: path to audio file
136+ :type audio_file_path: str
137+ :return: energy: Energy values or error message
138+ :rtype: Union[np.array, str]
139+ """
140140
141- # Get the number of periods in the signal
142- n_periods : int = 5 # Nperiods_EC
143- frameshift : int = 1 # variables.frameshift
144- time_praat , f0_praat = get_raw_pitch (audio_file_path )
145- f0 = refine_pitch_voice_sauce (time_praat , f0_praat )
146- signal , sampling_rate = self .args ['voice' ]
147- sound : parselmouth .Sound = parselmouth .Sound (signal , sampling_rate )
148- sound .resample (16000 )
149- y = sound .values .T
150- fs = sound .sampling_frequency
151- sampleshift : float = (fs / 1000 * frameshift )
152-
153- # Calculate Energy
154- energy : np .array = np .full (len (f0 ), np .nan )
155- for k , f0_curr in enumerate (f0 ):
156- ks : Union [float , int ] = round_half_away_from_zero (k * sampleshift )
157- if ks <= 0 :
158- continue
159- if ks >= len (y ):
160- continue
161-
162- f0_curr : Union [float , int ] = f0 [k ]
163- if np .isnan (f0_curr ):
164- continue
165- if f0_curr == 0 :
166- continue
167- n0_curr : Union [float , int ] = fs / f0_curr
168- ystart : int = int (round_half_away_from_zero (ks - n_periods / 2 * n0_curr ))
169- yend : int = int (round_half_away_from_zero (ks + n_periods / 2 * n0_curr ) - 1 )
170-
171- if ystart <= 0 :
172- continue
173-
174- if yend > len (y ):
175- continue
176-
177- yseg : np .array = y [ystart :yend ]
178- energy [k ] = np .sum (yseg ** 2 )
179- return energy
180-
181-
182- def get_raw_pitch (audio_file_path : str ) -> tuple [pd .DataFrame , pd .DataFrame ]:
183- """Get raw pitch from Praat. This is used to set the window length for the energy calculation.
184-
185- :argument: audio_file_path: path to the audio file
186- :type: str
187- :return: time, f0
188- :rtype: tuple[pd.DataFrame, pd.DataFrame]
189- """
190- signal , sampling_rate = self .args ['voice' ]
191- sound : parselmouth .Sound = parselmouth .Sound (signal , sampling_rate )
192- sound .resample (16000 )
193- pitch : parselmouth .Pitch = sound .to_pitch_cc (
194- time_step = 0.001 ,
195- pitch_floor = 40 ,
196- pitch_ceiling = 500 ,
197- )
198- pitch_tier : parselmouth .Data = call (pitch , "Down to PitchTier" )
199- call (pitch_tier , "Write to headerless spreadsheet file" , "parselmouth_cc.txt" )
200- df : pd .DataFrame = pd .read_csv ('parselmouth_cc.txt' , sep = '\t ' , header = None )
201- df .columns = ['Time' , 'Frequency' ]
202- return df .Time .values , df .Frequency .values
203-
204-
205- def refine_pitch_voice_sauce (times : pd .DataFrame , frequencies : pd .DataFrame ) -> np .array :
206- """Refine praat Pitch to remove undefined values, and interpolate values to match our time step.
207-
208- :argument: times: np.array
209- :type: times: np.array
210- :argument: frequencies: np.array
211- :type: frequencies: np.array
212-
213- :return: f0: refined fundamental frequency values
214- :rtype: np.array
141+ # Get the number of periods in the signal
142+ n_periods : int = 5 # Nperiods_EC
143+ frameshift : int = 1 # variables.frameshift
144+ time_praat , f0_praat = self . get_raw_pitch (audio_file_path )
145+ f0 = self . refine_pitch_voice_sauce (time_praat , f0_praat )
146+ signal , sampling_rate = self .args ['voice' ]
147+ sound : parselmouth .Sound = parselmouth .Sound (signal , sampling_rate )
148+ sound .resample (16000 )
149+ y = sound .values .T
150+ fs = sound .sampling_frequency
151+ sampleshift : float = (fs / 1000 * frameshift )
152+
153+ # Calculate Energy
154+ energy : np .array = np .full (len (f0 ), np .nan )
155+ for k , f0_curr in enumerate (f0 ):
156+ ks : Union [float , int ] = self . round_half_away_from_zero (k * sampleshift )
157+ if ks <= 0 :
158+ continue
159+ if ks >= len (y ):
160+ continue
161+
162+ f0_curr : Union [float , int ] = f0 [k ]
163+ if np .isnan (f0_curr ):
164+ continue
165+ if f0_curr == 0 :
166+ continue
167+ n0_curr : Union [float , int ] = fs / f0_curr
168+ ystart : int = int (self . round_half_away_from_zero (ks - n_periods / 2 * n0_curr ))
169+ yend : int = int (self . round_half_away_from_zero (ks + n_periods / 2 * n0_curr ) - 1 )
170+
171+ if ystart <= 0 :
172+ continue
173+
174+ if yend > len (y ):
175+ continue
176+
177+ yseg : np .array = y [ystart :yend ]
178+ energy [k ] = np .sum (yseg ** 2 )
179+ return energy
180+
181+
182+ def get_raw_pitch (self , audio_file_path : str ) -> tuple [pd .DataFrame , pd .DataFrame ]:
183+ """Get raw pitch from Praat. This is used to set the window length for the energy calculation.
184+
185+ :argument: audio_file_path: path to the audio file
186+ :type: str
187+ :return: time, f0
188+ :rtype: tuple[pd.DataFrame, pd.DataFrame]
189+ """
190+ signal , sampling_rate = self .args ['voice' ]
191+ sound : parselmouth .Sound = parselmouth .Sound (signal , sampling_rate )
192+ sound .resample (16000 )
193+ pitch : parselmouth .Pitch = sound .to_pitch_cc (
194+ time_step = 0.001 ,
195+ pitch_floor = 40 ,
196+ pitch_ceiling = 500 ,
197+ )
198+ pitch_tier : parselmouth .Data = call (pitch , "Down to PitchTier" )
199+ call (pitch_tier , "Write to headerless spreadsheet file" , "parselmouth_cc.txt" )
200+ df : pd .DataFrame = pd .read_csv ('parselmouth_cc.txt' , sep = '\t ' , header = None )
201+ df .columns = ['Time' , 'Frequency' ]
202+ return df .Time .values , df .Frequency .values
203+
204+
205+ def refine_pitch_voice_sauce (self , times : pd .DataFrame , frequencies : pd .DataFrame ) -> np .array :
206+ """Refine praat Pitch to remove undefined values, and interpolate values to match our time step.
207+
208+ :argument: times: np.array
209+ :type: times: np.array
210+ :argument: frequencies: np.array
211+ :type: frequencies: np.array
212+
213+ :return: f0: refined fundamental frequency values
214+ :rtype: np.array
215215
216- """
216+ """
217217
218- # Licensed under Apache v2 (see LICENSE)
219- # Based on VoiceSauce files func_PraatPitch.m (authored by Yen-Liang Shue
220- # and Kristine Yu) and func_PraatFormants.m (authored by Yen-Liang Shue and
221- # Kristine Yu)
222-
223-
224- # Praat will sometimes set numerical values to the string '--undefined--'
225- # But NumPy can't have a string in a float array, so we convert the
226- # '--undefined--' values to NaN
227- # Python 3 reads the undefined strings as byte literals, so we also have to
228- # check for the byte literal b'--undefined--'
229- # undef = lambda x: np.nan if x == '--undefined--' or x == b'--undefined--' else x ### this function is not used
230- frame_shift : Union [float , int ] = 1
231- frame_precision : Union [float , int ] = 1
232- # Gather raw Praat f0 estimates
233- t_raw : np .array
234- f0_raw : np .array
235- t_raw , f0_raw = np .array (times ), np .array (frequencies )
236- data_len : int = len (t_raw )
237- # Initialize f0 measurement vector with NaN
238- f0 : np .array = np .full (data_len , 0 , dtype = float )
239- # Convert time from seconds to nearest whole millisecond
240- t_raw_ms : np .int_ = np .int_ (round_half_away_from_zero (t_raw * 1000 ))
241-
242- # Raw Praat estimates are at time points that don't completely match
243- # the time points in our measurement vectors, so we need to interpolate.
244- # We use a crude interpolation method, that has precision set by
245- # frame_precision.
246-
247- # Determine start and stop times
248- start : int = 0
249- if t_raw_ms [- 1 ] % frame_shift == 0 :
250- stop : Union [float , int ] = t_raw_ms [- 1 ] + frame_shift
251- else :
252- stop = t_raw_ms [- 1 ]
253- # Iterate through timepoints corresponding to each frame in time range
254- for idx_f , t_f in enumerate (range (start , stop , frame_shift )):
255- # Find closest time point among calculated Praat values
256- min_idx : np .ndarray [int ] = np .argmin (np .abs (t_raw_ms - t_f ))
257-
258- # If closest time point is too far away, skip
259- if np .abs (t_raw_ms [min_idx ] - t_f ) > frame_precision * frame_shift :
260- continue
261-
262- # If index is in range, set value of f0
263- if (idx_f >= 0 ) and (idx_f < data_len ): # pragma: no branch
264- f0 [idx_f ] = f0_raw [min_idx ]
265- return f0
266-
267-
268- def round_half_away_from_zero (x ) -> np .int_ :
269- """Rounds a number according to round half away from zero method
270-
271- :argument x: number to round
272- :type x: Union[float, int]
273- :return: rounded number
274- :rtype: np.int_
275-
276-
277- For example:
278- - round_half_away_from_zero(3.5) = 4
279- - round_half_away_from_zero(3.2) = 3
280- - round_half_away_from_zero(-2.7) = -3
281- - round_half_away_from_zero(-4.3) = -4
282-
283- The reason for writing our own rounding function is that NumPy uses the round-half-to-even method. There is a Python round() function, but it doesn't work on NumPy vectors. So we wrote our own round-half-away-from-zero method here.
284- """
285- q : np .int_ = np .int_ (np .sign (x ) * np .floor (np .abs (x ) + 0.5 ))
218+ # Licensed under Apache v2 (see LICENSE)
219+ # Based on VoiceSauce files func_PraatPitch.m (authored by Yen-Liang Shue
220+ # and Kristine Yu) and func_PraatFormants.m (authored by Yen-Liang Shue and
221+ # Kristine Yu)
222+
223+
224+ # Praat will sometimes set numerical values to the string '--undefined--'
225+ # But NumPy can't have a string in a float array, so we convert the
226+ # '--undefined--' values to NaN
227+ # Python 3 reads the undefined strings as byte literals, so we also have to
228+ # check for the byte literal b'--undefined--'
229+ # undef = lambda x: np.nan if x == '--undefined--' or x == b'--undefined--' else x ### this function is not used
230+ frame_shift : Union [float , int ] = 1
231+ frame_precision : Union [float , int ] = 1
232+ # Gather raw Praat f0 estimates
233+ t_raw : np .array
234+ f0_raw : np .array
235+ t_raw , f0_raw = np .array (times ), np .array (frequencies )
236+ data_len : int = len (t_raw )
237+ # Initialize f0 measurement vector with NaN
238+ f0 : np .array = np .full (data_len , 0 , dtype = float )
239+ # Convert time from seconds to nearest whole millisecond
240+ t_raw_ms : np .int_ = np .int_ (self . round_half_away_from_zero (t_raw * 1000 ))
241+
242+ # Raw Praat estimates are at time points that don't completely match
243+ # the time points in our measurement vectors, so we need to interpolate.
244+ # We use a crude interpolation method, that has precision set by
245+ # frame_precision.
246+
247+ # Determine start and stop times
248+ start : int = 0
249+ if t_raw_ms [- 1 ] % frame_shift == 0 :
250+ stop : Union [float , int ] = t_raw_ms [- 1 ] + frame_shift
251+ else :
252+ stop = t_raw_ms [- 1 ]
253+ # Iterate through timepoints corresponding to each frame in time range
254+ for idx_f , t_f in enumerate (range (start , stop , frame_shift )):
255+ # Find closest time point among calculated Praat values
256+ min_idx : np .ndarray [int ] = np .argmin (np .abs (t_raw_ms - t_f ))
257+
258+ # If closest time point is too far away, skip
259+ if np .abs (t_raw_ms [min_idx ] - t_f ) > frame_precision * frame_shift :
260+ continue
261+
262+ # If index is in range, set value of f0
263+ if (idx_f >= 0 ) and (idx_f < data_len ): # pragma: no branch
264+ f0 [idx_f ] = f0_raw [min_idx ]
265+ return f0
266+
267+
268+ def round_half_away_from_zero (self , x ) -> np .int_ :
269+ """Rounds a number according to round half away from zero method
270+
271+ :argument x: number to round
272+ :type x: Union[float, int]
273+ :return: rounded number
274+ :rtype: np.int_
275+
276+
277+ For example:
278+ - round_half_away_from_zero(3.5) = 4
279+ - round_half_away_from_zero(3.2) = 3
280+ - round_half_away_from_zero(-2.7) = -3
281+ - round_half_away_from_zero(-4.3) = -4
282+
283+ The reason for writing our own rounding function is that NumPy uses the round-half-to-even method. There is a Python round() function, but it doesn't work on NumPy vectors. So we wrote our own round-half-away-from-zero method here.
284+ """
285+ q : np .int_ = np .int_ (np .sign (x ) * np .floor (np .abs (x ) + 0.5 ))
286286
287- return q
287+ return q
0 commit comments