-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
88 lines (71 loc) · 2.85 KB
/
test.py
File metadata and controls
88 lines (71 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
from tensorflow.keras.models import model_from_json
import librosa
import scipy
from skimage.transform import resize
import tensorflow as tf
import tensorflow_io as tfio
import warnings
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
warnings.filterwarnings("ignore")
def load_model(model_path_with_model, model_name):
with open(f"{model_path_with_model}/{model_name}.json", "r") as json_file:
model_json = json_file.read()
model = model_from_json(model_json)
model.load_weights(f"{model_path_with_model}/{model_name}.weights.h5")
return model
def audio_load(filename):
au, sr = librosa.load(filename, sr=8000)
audio = np.empty((8000,), dtype=np.float32)
if len(au) < 8000:
audio = np.pad(au, (0, 8000 - len(au)))
elif len(au) > 8000:
audio = au[:8000]
print("Audio processed.")
return audio
def spectrogram_generator(audio_in):
f, t, zxx = scipy.signal.stft(
audio_in, 8000, nperseg=455, noverlap=393, window="hann"
)
zxx_spect = np.abs(zxx)
spectrogram = np.atleast_3d(zxx_spect)
mel_spect = tfio.audio.melscale(spectrogram, rate=8000, mels=128, fmin=0, fmax=4000)
log_mel_spect = tf.math.log(mel_spect + 1e-6)
dbscale_spect = tfio.audio.dbscale(log_mel_spect, top_db=80)
resized_spect = resize(dbscale_spect.numpy(), (128, 128, 1), preserve_range=True)
resized = tf.convert_to_tensor(resized_spect, dtype=tf.float32)
final_spect = tf.reshape(resized, (1, 128, 128, 1))
print("Spectrogram generated.")
return final_spect
def prediction(audio_r, spect, wgmodel, wdmodel, sgmodel, sdmodel):
predicted_gender = np.argmax(wgmodel.predict(audio_r.reshape(1, 8000, 1)), axis=1)
predicted_digit = np.argmax(wdmodel.predict(audio_r.reshape(1, 8000, 1)), axis=1)
spect_predicted_gender = np.argmax(sgmodel.predict(spect), axis=1)
spect_predicted_digit = np.argmax(sdmodel.predict(spect), axis=1)
print("Predictions made.")
return (
predicted_gender,
predicted_digit,
spect_predicted_gender,
spect_predicted_digit,
)
if __name__ == "__main__":
models_path = (
"C:\\Users\\ritis\\Desktop\\Audio-Classification-CNNs\\vastai\\working\\models"
)
waveform_gender = load_model(models_path, "waveform_gender_model")
waveform_digit = load_model(models_path, "waveform_num_model")
spectrogram_gender = load_model(models_path, "spectrogram_gender_model")
spectrogram_digit = load_model(models_path, "spectrogram_num_model")
audio_mobile = audio_load("Zero-1.wav")
spect_mobile = spectrogram_generator(audio_mobile)
wgender, wdigit, sgender, sdigit = prediction(
audio_mobile,
spect_mobile,
waveform_gender,
waveform_digit,
spectrogram_gender,
spectrogram_digit,
)
print(f"Zero: {wgender}, {wdigit}, {sgender}, {sdigit}")