EmoShift/utils.py at main · root2pk/EmoShift · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Methods for extracting audio features from audio files.

The EssentiaClasses class is used to extract audio features from audio files using Essentia.

The search_audio_files function is used to search for audio files in a given directory.

The load_audio_file function is used to load an audio file from a given path, downmix to mono and resample to 16kHz.

"""

import os
# Set logging level for tensorflow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}

import essentia
# # Set logging level for essentia
essentia.log.warningActive = False               # deactivate the warning level
essentia.log.infoActive = False                  # deactivate the info level

import essentia.standard as es

class EssentiaClasses:
    """
    Class for extracting audio features from audio files using Essentia
    """

    def __init__(self):
        """
        Initialise the Essentia classes for feature extraction

        Parameters:
        None

        Returns:
        None
        """

        self.getKeyTemperley = es.KeyExtractor(profileType='temperley')
        self.getMusiCNNEmbeddings = es.TensorflowPredictMusiCNN(graphFilename="weights/msd-musicnn-1.pb", output="model/dense/BiasAdd",)
        self.getArousalAndValence = es.TensorflowPredict2D(graphFilename="weights/emomusic-msd-musicnn-2.pb", output="model/Identity", batchSize=self.batchSize)

    def extract_features(self, audio_mono, audio_stereo):
        """
        Extract audio features from an audio file

        Parameters:
        audio_file (str): The path to the audio file

        Returns:
        None
        """

        # Extract features
        self.keyTemperley, self.scaleTemperley, _= self.getKeyTemperley(audio_mono)
        musicnnEmbeddings = self.getMusiCNNEmbeddings(audio_mono)
        arouVal = self.getArousalAndValence(musicnnEmbeddings)
        arousal = arouVal[:, 0]
        valence = arouVal[:, 1]

        # Average the arousal and valence predictions
        self.arousal = arousal.mean(axis=0)
        self.valence = valence.mean(axis=0)

    def write_features_dict(self, audio_file):
        """
        Write the extracted features to a dictionary

        Parameters:
        audio_file (str): The path to the audio file

        Returns:
        features (dict): A dictionary containing the extracted features

        """

        features = {
            'audio_file': audio_file,
            'keyTemperley': self.keyTemperley,
            'scaleTemperley': self.scaleTemperley,
            'arousal': self.arousal,
            'valence': self.valence,
        }

        return features


def search_audio_files(directory, file_types=['.mp3', '.wav', '.flac', '.aac']):
    """
    Search for audio files in a given directory

    Parameters:
    directory (str): The directory to search for audio files
    file_types (list): The file types to search for

    Returns:
    audio_files (list): A list of the audio files found in the directory

    """

    audio_files = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(tuple(file_types)):
                audio_files.append(os.path.relpath(os.path.join(root, file), os.getcwd()))

    return audio_files

def load_audio_file(file_path):
    """
    Load an audio file from a given path, downmix to mono and resample to 16kHz

    Parameterers:
    file_path (str): The path to the audio file

    Returns:
    audio_stereo (np.array): The audio signal
    audio_mono(np.array): The downmixed audio signal resampled to 16kHz

    """
    # Extract stereo auio
    audio_stereo, sr, nc, _, _, _ =  es.AudioLoader(filename=file_path)()
    # Mix to mono
    audio_mono = es.MonoMixer()(audio_stereo, nc)
    # Resample to 16kHz
    audio_mono = es.Resample(inputSampleRate=44100, outputSampleRate=16000)(audio_mono)

    return audio_stereo, audio_mono