-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathread_wave.py
More file actions
134 lines (104 loc) · 4.79 KB
/
read_wave.py
File metadata and controls
134 lines (104 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import struct as st
import numpy as np
import math
def read_wave(path, normalize=True, length=1, threshold=0.001):
'''
Function to parse wave file data.
Inputs: path (string) - full filepath to audio file
normalize (bool) - set true to apply a normalization multiplier across audio frames
so that frame data ranges between [-1, 1]
length (float) - chops audio sample to meet this length (seconds). If audio sample is shorter than
length, it will be unchanged
Outputs: data (list) - array containing audio data values averaged between the left and right channels
sampleRate (int) - 1 / sampleRate gives the time seperation between data values. sampleRate / 2 gives the
nyquist frequency of the discrete signal
Return values are None, None if:
1. File open error
2. File not does meet the following specification:
a. Correctly formed header information (chunk ids, formats, etc.)
b. File is uncompressed (PCM = 1)
c. File is stereo
'''
''' HELPER FUNCTIONS START'''
def _findDataStart(fileContent):
start = 36
while start < len(fileContent):
if (fileContent[start:start+4] == b'data'):
return start
else:
start += 1
return None
def _normalizeData(data):
maxVal = abs(max(data, key=abs))
multiplier = 1 / maxVal
normalizedData = [x * multiplier for x in data]
return normalizedData
def _convertBinaryStringToInt(bString):
return int.from_bytes(bString, byteorder="little", signed=True)
def _extractData(dataString, bytesPerSample):
size = st.unpack('<L', dataString[4:8])[0]
pos = 8
data = []
while pos < size:
leftData = _convertBinaryStringToInt(dataString[pos:pos+bytesPerSample])
pos += bytesPerSample
rightData = _convertBinaryStringToInt(dataString[pos:pos+bytesPerSample])
pos += bytesPerSample
data.append((leftData + rightData) / 2)
return data
def _applyLength(signal, sampleRate, duration):
numFramesDesired = math.floor(sampleRate * duration)
currNumFrames = len(signal)
if currNumFrames > numFramesDesired:
signal = signal[:numFramesDesired]
return signal
def _trimData(signal, threshold):
windowSize = 100
toTrim = [] # list of indeces to trim out of the signal
absSignal = [abs(x) for x in signal]
for i in range(len(absSignal)-windowSize):
avg = sum(absSignal[i:i+windowSize]) / windowSize
if avg < threshold:
toTrim.append(i)
for i in range(len(absSignal)-windowSize, len(absSignal)):
avg = sum(absSignal[i:]) / len(signal[i:])
if avg < threshold:
toTrim.append(i)
for idx in sorted(toTrim, reverse=True):
del signal[idx]
return signal
''' HELPER FUNCTIONS END'''
try:
with open(path, mode='rb') as file:
fileContent = file.read()
except:
print("Cannot open file at: " + path)
return None, None
chunkID = fileContent[0:4].decode('ascii') # Should read 'RIFF'
format = fileContent[8:12].decode('ascii') # Should read 'WAVE'
subChunk1ID = fileContent[12:16].decode('ascii') # Should be 'fmt '
audioFormat = st.unpack('<H', fileContent[20:22])[0] # 1 for PCM, NOTE: only handle this for now
numChannels = st.unpack('<H', fileContent[22:24])[0] # 2 for stereo, 1 for mono, if it is mono then left and right channel
# data are duplicated in each sample frame
sampleRate = st.unpack('<L', fileContent[24:28])[0]
bitsPerSample = st.unpack('<H', fileContent[34:36])[0]
if chunkID != 'RIFF' or format != 'WAVE' or subChunk1ID != 'fmt ' or audioFormat != 1 or (numChannels != 1 and numChannels != 2):
print("File format issues at: " + path)
print("Make sure file is not compressed")
return None, None
dataStart = _findDataStart(fileContent)
if dataStart == None:
print("File data could not be retrived at: " + path)
return None, None
bytesPerSample = bitsPerSample / 8
if bytesPerSample.is_integer() == False:
print("Imcompatiable bytes per sample at: " + path)
return None, None
data = _extractData(fileContent[dataStart:], int(bytesPerSample))
if normalize:
data = _normalizeData(data)
if threshold is not None:
data = _trimData(data, threshold)
if length is not None:
data = _applyLength(data, sampleRate, length)
return data, sampleRate