ml-project/classifier_util.py at main · pentaoa/ml-project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import shutil
import json
import glob
from pathlib import Path

# 路径配置
eeg_data_dir = "/home/ldy/Closed_loop_optimizing/ml-project/Artphoto/eeg"
target_dir = "/home/ldy/Closed_loop_optimizing/ml-project/Artphoto/classify"
labels_file = "/home/ldy/Closed_loop_optimizing/ml-project/Artphoto/classify/labels.json"

# 创建目标目录
os.makedirs(target_dir, exist_ok=True)

def copy_eeg_files_and_create_labels():
    """
    复制amusement和sadness的EEG文件到目标目录并创建标签文件
    amu -> 1 (高兴)
    sad -> 0 (伤心)
    """

    # 定义情绪类别和对应标签
    emotion_mapping = {
        'amu': 1,  # 高兴
        'sad': 0     # 伤心
    }

    labels_dict = {}
    copied_files_count = {'amu': 0, 'sad': 0}

    print("开始处理EEG文件...")
    print("-" * 60)

    for emotion, label in emotion_mapping.items():
        emotion_dir = os.path.join(eeg_data_dir, emotion)

        if not os.path.exists(emotion_dir):
            print(f"警告: 目录不存在 {emotion_dir}")
            continue

        print(f"处理 {emotion} 类别 (标签: {label})...")

        # 获取该情绪类别下的所有.npy文件
        npy_files = glob.glob(os.path.join(emotion_dir, "*.npy"))

        if not npy_files:
            print(f"  警告: {emotion_dir} 中没有找到.npy文件")
            continue

        print(f"  找到 {len(npy_files)} 个EEG文件")

        for file_path in npy_files:
            # 获取原文件名
            original_filename = os.path.basename(file_path)

            # 创建新的文件名，加上情绪前缀以避免重名
            new_filename = f"{emotion}_{original_filename}"
            target_path = os.path.join(target_dir, new_filename)

            try:
                # 复制文件到目标目录
                shutil.copy2(file_path, target_path)

                # 添加到标签字典
                labels_dict[new_filename] = label

                copied_files_count[emotion] += 1

            except Exception as e:
                print(f"  错误: 复制文件 {original_filename} 失败: {e}")

        print(f"  成功复制 {copied_files_count[emotion]} 个文件")

    # 保存标签文件
    try:
        with open(labels_file, 'w', encoding='utf-8') as f:
            json.dump(labels_dict, f, indent=2, ensure_ascii=False)
        print(f"\n标签文件已保存到: {labels_file}")

    except Exception as e:
        print(f"错误: 保存标签文件失败: {e}")
        return False

    # 打印统计信息
    print("\n" + "=" * 60)
    print("处理完成！统计信息:")
    print(f"目标目录: {target_dir}")
    print(f"标签文件: {labels_file}")
    print(f"总文件数: {sum(copied_files_count.values())}")

    for emotion, count in copied_files_count.items():
        label = emotion_mapping[emotion]
        print(f"  {emotion} (标签 {label}): {count} 个文件")

    # 验证标签分布
    label_counts = {}
    for label in labels_dict.values():
        label_counts[label] = label_counts.get(label, 0) + 1

    print(f"\n标签分布:")
    print(f"  标签 0 (sad): {label_counts.get(0, 0)} 个文件")
    print(f"  标签 1 (amu): {label_counts.get(1, 0)} 个文件")

    return True

def verify_copied_files():
    """验证复制的文件"""
    print("\n验证复制的文件...")

    # 检查目标目录
    if not os.path.exists(target_dir):
        print(f"错误: 目标目录不存在 {target_dir}")
        return False

    copied_files = glob.glob(os.path.join(target_dir, "*.npy"))
    print(f"目标目录中有 {len(copied_files)} 个.npy文件")

    # 检查标签文件
    if not os.path.exists(labels_file):
        print(f"错误: 标签文件不存在 {labels_file}")
        return False

    with open(labels_file, 'r', encoding='utf-8') as f:
        labels_dict = json.load(f)

    print(f"标签文件中有 {len(labels_dict)} 个条目")

    # 验证文件和标签的一致性
    missing_labels = []
    missing_files = []

    for filename in os.listdir(target_dir):
        if filename.endswith('.npy') and filename not in labels_dict:
            missing_labels.append(filename)

    for filename in labels_dict.keys():
        if not os.path.exists(os.path.join(target_dir, filename)):
            missing_files.append(filename)

    if missing_labels:
        print(f"警告: {len(missing_labels)} 个文件缺少标签")

    if missing_files:
        print(f"警告: {len(missing_files)} 个标签对应的文件不存在")

    if not missing_labels and not missing_files:
        print("验证通过：文件和标签完全一致")

    # 检查原始文件是否仍然存在
    print("\n检查原始文件...")
    emotion_dirs = ['amu', 'sad']
    for emotion in emotion_dirs:
        original_dir = os.path.join(eeg_data_dir, emotion)
        if os.path.exists(original_dir):
            original_files = glob.glob(os.path.join(original_dir, "*.npy"))
            print(f"  {emotion} 目录中仍有 {len(original_files)} 个原始文件")
        else:
            print(f"  警告: {emotion} 目录不存在")

    return True

def display_sample_info():
    """显示样本信息"""
    if not os.path.exists(labels_file):
        print("标签文件不存在，无法显示样本信息")
        return

    with open(labels_file, 'r', encoding='utf-8') as f:
        labels_dict = json.load(f)

    print("\n" + "=" * 60)
    print("样本信息预览:")

    # 显示前几个样本
    sample_count = 0
    for filename, label in labels_dict.items():
        if sample_count < 5:
            emotion = "amu" if label == 1 else "sad"
            print(f"  文件: {filename} -> 标签: {label} ({emotion})")
            sample_count += 1
        else:
            break

    if len(labels_dict) > 5:
        print(f"  ... 还有 {len(labels_dict) - 5} 个文件")

    print(f"\n数据集可以直接用于训练分类器！")
    print(f"使用路径:")
    print(f"  数据目录: {target_dir}")
    print(f"  标签文件: {labels_file}")

def main():
    """主函数"""
    print("EEG情绪分类数据处理工具")
    print("=" * 60)

    # 1. 复制文件并创建标签
    success = copy_eeg_files_and_create_labels()
    if not success:
        print("处理失败，程序退出")
        return

    # 2. 验证文件
    verify_copied_files()

    # 3. 显示样本信息
    display_sample_info()

    print("\n所有处理完成！")

if __name__ == "__main__":
    main()