fcca_analysis/consolidation.py at main · BouchardLab/fcca_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import pickle
import pandas as pd
import glob
import pdb
import numpy as np
from deepdiff import DeepDiff

import sys

sys.path.append('/home/akumar/nse/neural_control')
from loaders import load_sabes
from utils import apply_df_filters

def get_todo(path):

    # Enumerate args
    argfiles = glob.glob('%s/arg*.dat' % path)
    print(len(argfiles))
    todo = []
    for argfile in argfiles:
        # Open up the arg files
        with open(argfile, 'rb') as f:
            args = pickle.load(f)

        rfile = args['results_file']
        if not os.path.exists(rfile):
            todo.append(rfile)

    return todo

# Consolidate decoding results. Keep track of the directory where results are from and the original
# replacement path: Use this if we should look for the results or dimreduc file in a different location than the argfile indicates
# This happens when we run a job on NERSC and consolidate it locally
def consolidate_decoding(src_path, save_path, replacement_drpath=None):

    argfiles = glob.glob('%s/arg*.dat' % src_path)
    result_list = []
    for argfile in argfiles:

        # Open up the arg files
        with open(argfile, 'rb') as f:
            args = pickle.load(f)

        rfile = args['results_file']
        rfile = rfile.split('/')[-1]
        rfile = src_path + '/' + rfile

        if os.path.exists(rfile):
            with open(rfile, 'rb') as f:
                result = pickle.load(f)


            # Could be the case (e.g. in rand decoding) that result_
            # is itself a list of dictionaries
            if isinstance(result[0], list):
                # Flatten
                result = [r for result_ in result for r in result_]
            for result_ in result:
                for k, v in args.items():
                    if k == 'loader_args':
                        result_['dec_loader_args'] = v
                    if type(k) == dict:
                        for k_, v_ in k.items():
                            result_[k_] = v_
                    else:
                        try:
                            result_[k] = v
                        except:
                            pdb.set_trace()
            # Need to grab some information from the dimreduc arg file
            if replacement_drpath is None:
                dimreduc_path = '/'.join(args['task_args']['dimreduc_file'].split('/')[:-1])
            else:
                dimreduc_path = replacement_drpath

            dimreduc_no = args['task_args']['dimreduc_file'].split('_')[-1].split('.dat')[0]
            dimreduc_argfile = '%s/arg%s.dat' % (dimreduc_path, dimreduc_no)
            with open(dimreduc_argfile, 'rb') as f:
                dr_args = pickle.load(f)

            for result_ in result:
                for k, v in dr_args.items():
                    # Use the loader args from the decoding arg files
                    # This is used, for example, when we smooth at decoding
                    # but not at dimreduc
                    if type(k) == dict:
                        for k_, v_ in k.items():
                            result_[k_] = v_
                    else:
                        result_[k] = v

            result_list.extend(result)
        else:
            continue

    # For ease of use, use data file names that do not involve the directory path
    for r in result_list:
        r['data_file'] = r['data_file'].split('/')[-1]

    try:
        # Try using pickle to save the data
        with open(save_path, 'wb') as f:
            f.write(pickle.dumps(result_list))
            f.write(pickle.dumps(src_path))
        print("Data saved using pickle.")

    except MemoryError:
        print("MemoryError occurred. Using joblib to save the data.")
        from joblib import dump, load

        # Save the data using joblib, then load it so it can be saved as a pickle.
        # Unsure if this is necessary, or if I can immedietly convert result_list to DF then save with pickle...
        dump(result_list, save_path)
        data = load(save_path)
        df_decode = pd.DataFrame(data)
        with open(save_path, 'wb') as f:
            pickle.dump(df_decode, f)

        print("Data saved using joblib (then converted to regular .pkl).")


def consolidate_dimreduc(src_path, save_path):

    argfiles = glob.glob('%s/arg*.dat' % src_path)
    print(len(argfiles))
    result_list = []
    for argfile in argfiles:

        # Open up the arg files
        with open(argfile, 'rb') as f:
            args = pickle.load(f)

        rfile = args['results_file']
        with open(rfile, 'rb') as f:
            result = pickle.load(f)

        for result_ in result:
            for k, v in args.items():
                if type(k) == dict:
                    for k_, v_ in k.items():
                        result_[k_] = v_
                else:
                    result_[k] = v

        result_list.extend(result)

    # For ease of use, use data file names that do not involve the directory path
    for r in result_list:
        r['data_file'] = r['data_file'].split('/')[-1]

    # Save the result list, directory path
    with open(save_path, 'wb') as f:
        f.write(pickle.dumps(result_list))
        f.write(pickle.dumps(src_path))