-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
153 lines (116 loc) · 4.42 KB
/
main.py
File metadata and controls
153 lines (116 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import cv2
import sys
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import write
from scipy.interpolate import CubicSpline
FORMAT = cv2.IMREAD_GRAYSCALE
def flags_parser(input):
# Input validation
for entry in input:
if entry == "-h" or entry == "--help":
print("Usage: python main.py -f [image_path] -o [output_path]")
sys.exit(0)
if len(input) < 3:
print("Usage: python main.py -f [image_path] -o [output_path]")
sys.exit(1)
if "-f" and "-o" not in input:
print("Usage: python main.py -f [image_path] -o [output_path]")
sys.exit(1)
# Parse the input
for entry in input:
if entry == "-f":
file_dir = input[input.index(entry) + 1]
if not file_dir.endswith(".png"):
print("Only PNG files are supported")
sys.exit(1)
if entry == "-o":
output_dir = input[input.index(entry) + 1]
if not output_dir.endswith(".wav"):
print("Only WAV files are supported")
sys.exit(1)
return file_dir, output_dir
def image_parse(file_dir: str):
# Read the image
image = cv2.imread(file_dir, FORMAT)
# Generate the outline
sobel_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)
# Threshold the image
sobel_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
sobel_magnitude = np.uint8(np.clip(sobel_magnitude, 0, 255))
_, binary_image = cv2.threshold(image, 250, 255, cv2.THRESH_BINARY)
# Invert the image
cv2.bitwise_not(binary_image, binary_image)
# re-orient the image back to the original orientation
binary_image = np.array(list(reversed(binary_image)))
out = np.where(binary_image == 255)
display = cv2.flip(binary_image, 0)
# Display the image
cv2.imshow("Outline", display)
cv2.waitKey(0)
cv2.destroyAllWindows()
return out[1], out[0]
def generate_frequency(base, transformer):
# Output sine wave
output = []
# Output linear transform
linear_transform = []
# The raw transform
plt.plot([x for x in range(len(transformer))], transformer)
plt.show()
for i in transformer.tolist():
# Generate the sine wave
wave_data = np.sin(2 * np.pi * base * i * np.arange(5) / 5)
for entry in wave_data.tolist():
output.append(entry)
# Generate the no-op validation transform
line_data = np.linspace(1, 1, 5)
line_data = line_data * i
for entry in line_data.tolist():
linear_transform.append(entry)
# Validation transform, should be the transformer, but scaled out to the max value of the sine wave
plt.plot([x for x in range(len(linear_transform))], linear_transform)
plt.show()
# Returning the data as a float32 array here
return np.asarray(output, dtype=np.float32)
def process_coordinates(x_coords, y_coords):
# Dedouped coordinates
cleaned_coords = []
cleaned_coords_y = []
# Remove duplicates
for i in range(len(x_coords)):
if x_coords[i] not in cleaned_coords:
cleaned_coords_y.append(y_coords[i])
cleaned_coords.append(x_coords[i])
# Normalize the y coordinates
y = cleaned_coords_y / np.max(np.abs(cleaned_coords_y))
x = cleaned_coords
d2 = np.column_stack((x, y))
# Sort them for the spline fit
sorted_indices = np.argsort(d2[:, 0])
sorted_points = d2[sorted_indices]
# Fit the spline
cs = CubicSpline(sorted_points[:, 0], sorted_points[:, 1])
# Generate the spline
x_fine = np.linspace(min(x), max(x), 5000)
y_fine = cs(x_fine)
# Plot the spline
plt.scatter([x for x in range(len(y_fine))], y_fine)
plt.show()
# Scale the spline
scaled = np.int16(y_fine / np.max(np.abs(y_fine)) * 32767)
return scaled
if __name__ == "__main__":
target_name, output_name = flags_parser(sys.argv)
# Parse the image
x_coords, y_coords = image_parse(target_name)
# Process the coordinates
scaled = process_coordinates(x_coords, y_coords)
# Generate and transform the frequency
data = generate_frequency(1, scaled)
# Plot the final data
plt.plot([x for x in range(len(data))], data)
plt.show()
# Write the data to a WAV file
write(output_name, 100, data)