research-projects-RIT/MonteCarloMarginalizeCode/Code/RIFT/misc/xmlutils.py at b4682cf0af36b11ff32875d03690f1903d0c8d8a · oshaughn/research-projects-RIT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import types
import sqlite3
from collections import namedtuple

from functools import reduce

import numpy

from glue.lal import LIGOTimeGPS
from ligo.lw import ligolw, lsctables, table #, ilwd
#from glue.ligolw.utils import process

def assign_id(row, i):
    row.simulation_id = i # ilwd.ilwdchar("sim_inspiral_table:sim_inspiral:%d" % i)

def assign_time(row, t):
    setattr(row, "geocent_end_time",( int(t)))
    setattr(row, "geocent_end_time_ns",int( (t-int(t))*1e9 ) )

CMAP = { "right_ascension": "longitude",
    "longitude":"longitude",
    "latitude":"latitude",
    "declination": "latitude",
    "inclination": "inclination",
    "polarization": "polarization",
    "t_ref": assign_time,#r.set_time_geocent(LIGOTimeGPS(float(t))),
    "coa_phase": "coa_phase",
    "distance": "distance",
    "mass1": "mass1",
    "mass2": "mass2",
    # SHOEHORN ALERT
    "sample_n": assign_id,
    "alpha1":"alpha1",
    "alpha2":"alpha2",
    "alpha3":"alpha3",
    "alpha4":"alpha4",
    "alpha5":"alpha5",
    "alpha6":"alpha6",
    "psi3":"psi3",
    "beta":"beta",
    "loglikelihood": "alpha1",
    "joint_prior": "alpha2",
    "joint_s_prior": "alpha3",
    "eccentricity":"alpha4",
    "E0":"psi3",
    "p_phi0":"beta",
    "lambda1":"alpha5",
    "lambda2":"alpha6",
    "spin1x":"spin1x",
    "spin1y":"spin1y",
    "spin1z":"spin1z",
    "spin2x":"spin2x",
    "spin2y":"spin2y",
    "spin2z":"spin2z"
}

# FIXME: Find way to intersect given cols with valid cols when making table.
# Otherwise, we'll have to add them manually and ensure they all exist
sim_valid_cols = ["simulation_id", "inclination", "longitude", "latitude", "polarization", "geocent_end_time", "geocent_end_time_ns", "coa_phase", "distance", "mass1", "mass2", "alpha1", "alpha2", "alpha3", "alpha4", "alpha5", "alpha6", "psi3", "beta", "spin1x", "spin1y", "spin1z", "spin2x", "spin2y", "spin2z"]
sngl_valid_cols = [ "event_id", "snr", "tau0", "tau3"]
multi_valid_cols = ["process_id", "event_id", "snr"]

def append_samples_to_xmldoc(xmldoc, sampdict):
    try:
        si_table = lsctables.SimInspiralTable.get_table(xmldoc)
        new_table = False
    # Warning: This will also get triggered if there is *more* than one table
    except ValueError:
        si_table = lsctables.New(lsctables.SimInspiralTable, sim_valid_cols)
        new_table = True

    keys = list(sampdict.keys())
    # Just in case the key/value pairs don't come out synchronized
    values = numpy.array([sampdict[k] for k in keys], object)

    # Flatten the keys
    import collections
    keys = reduce(list.__add__, [list(i) if isinstance(i, tuple) else [i] for i in keys])

    # Get the process
    # FIXME: Assumed that only we have appended information
    procid = lsctables.ProcessTable.get_table(xmldoc)[-1].process_id

    # map the samples to sim inspiral rows
    # NOTE :The list comprehension is to preserve the grouping of multiple
    # parameters across the transpose operation. It's probably not necessary,
    # so if speed dictates, it can be reworked by flattening before arriving
    # here
    for vrow in numpy.array(list(zip(*[vrow_sub.T for vrow_sub in values])), dtype=object):
        #si_table.append(samples_to_siminsp_row(si_table, **dict(zip(keys, vrow.flatten()))))
        vrow = reduce(list.__add__, [list(i) if isinstance(i, collections.abc.Iterable) else [i] for i in vrow])
        si_table.append(samples_to_siminsp_row(si_table, **dict(list(zip(keys, vrow)))))
        si_table[-1].process_id = procid

    if new_table:
        xmldoc.childNodes[0].appendChild(si_table)
    return xmldoc

def append_likelihood_result_to_xmldoc(xmldoc, loglikelihood, neff=0, converged=False,**cols):
    try:
        si_table = lsctables.SnglInspiralTable.get_table(xmldoc)
        new_table = False
        # NOTE: MultiInspiralTable has no spin columns
        #si_table = table.get_table(xmldoc, lsctables.MultiInspiralTable.tableName)
    # Warning: This will also get triggered if there is *more* than one table
    except ValueError:
        si_table = lsctables.New(lsctables.SnglInspiralTable, sngl_valid_cols + list(cols.keys()))
        new_table = True
        # NOTE: MultiInspiralTable has no spin columns
        #si_table = lsctables.New(lsctables.MultiInspiralTable, multi_valid_cols + cols.keys())

    # Get the process
    # FIXME: Assumed that only we have appended information
    procid = lsctables.ProcessTable.get_table(xmldoc)[-1].process_id

    # map the samples to sim inspiral rows
    si_table.append(likelihood_to_snglinsp_row(si_table, loglikelihood, neff, converged,**cols))
    si_table[-1].process_id = procid

    if new_table:
        xmldoc.childNodes[0].appendChild(si_table)

    return xmldoc

def samples_to_siminsp_row(table, colmap={}, **sampdict):
    row = table.RowType()
    row.simulation_id = table.get_next_id()
    for key, col in list(CMAP.items()):
        if key not in sampdict:
            continue
        if isinstance(col, types.FunctionType):
            col(row, sampdict[key])
        else:
            setattr(row, col, sampdict[key])

    return row

def likelihood_to_snglinsp_row(table, loglikelihood, neff=0, converged=False, **cols):
    row = table.RowType()
    row.event_id = table.get_next_id()
    for col in cols:
            setattr(row, col, cols[col])
    row.snr = loglikelihood
    row.tau0 = neff
    row.tau3 = int(converged)

    return row

def db_identify_param(db_fname, process_id, param):
    """
    Extract the event time for a given process ID. This may fail in the case that the event time was not given on the command line (rather in a pointer to a XML file)
    NOTE: This is definitely not the best way to do this.
    """

    cmd_prm = "--" + param.replace("_", "-")

    sql = """select value from process_params where process_id = "%s" and param = "%s" """ % (str(process_id), cmd_prm)

    try:
        connection = sqlite3.connect(db_fname)
        result = list(connection.execute(sql))[0][0]
    finally:
        connection.close()
    return result

def db_to_samples(db_fname, tbltype, cols):
    """
    Pull samples from db_fname and return object that resembles a row from an XML table.
    """
    if "geocent_end_time" in cols:
        cols.append("geocent_end_time_ns")

    # FIXME: Get columns from db
    #if cols is None:
        #colsspec = "*"
    #else:
    colsspec = ", ".join(cols)

    if tbltype == lsctables.SimInspiralTable:
        sql = """select %s from sim_inspiral""" % colsspec
    elif tbltype == lsctables.SnglInspiralTable:
        sql = """select %s from sngl_inspiral""" % colsspec
    else:
        raise ValueError("Don't know SQL for table %s" % tbltype.tableName)

    Sample = namedtuple("Sample", cols)
    samples = []

    try:
        connection = sqlite3.connect(db_fname)
        connection.row_factory = sqlite3.Row
        for row in connection.execute(sql):
            # FIXME: UGH!
            res = dict(list(zip(cols, row)))
            if "geocent_end_time" in list(res.keys()):
                res["geocent_end_time"] += res["geocent_end_time_ns"]*1e-9

            samples.append(Sample(**res))
    finally:
        connection.close()

    return samples

# TESTING
import sys
if __file__ == sys.argv[0]:
    import numpy

    # Not used yet
    del CMAP["int_var"]
    del CMAP["int_val"]
    del CMAP["sample_n"]

    # Reworked to resemble usage in pipeline
    del CMAP["mass1"]
    del CMAP["mass2"]
    CMAP[("mass1", "mass2")] = ("mass1", "mass2")
    ar = numpy.random.random((len(CMAP), 10))
    samp_dict = dict(list(zip(CMAP, ar)))
    ar = samp_dict[("mass1", "mass2")]
    samp_dict[("mass1", "mass2")] = numpy.array([ar, ar])
    del CMAP[("mass1", "mass2")]
    CMAP["mass1"] = "mass1"
    CMAP["mass2"] = "mass2"

    samp_dict["samp_n"] = numpy.array(list(range(0,10)))
    CMAP["sample_n"] = "sample_n"

    xmldoc = ligolw.Document()
    xmldoc.appendChild(ligolw.LIGO_LW())
    process.register_to_xmldoc(xmldoc, sys.argv[0], {})

    append_samples_to_xmldoc(xmldoc, samp_dict)

    def gaussian(x, mu=0, std=1):
        return 1/numpy.sqrt(numpy.pi*2)/std * numpy.exp(-(x-mu)**2/2/std**2)

    m1m, m2m = 1.4, 1.5
    m1, m2 = numpy.random.random(2000).reshape(2,1000)*1.0+1.0
    loglikes = [gaussian(m1i, m1m)*gaussian(m2i, m2m) for m1i, m2i in zip(m1, m2)]
    #loglikelihood = - 7.5**2/2
    #append_likelihood_result_to_xmldoc(xmldoc, loglikelihood, **{"mass1": 1.4, "mass2": 1.4, "ifos": "H1,L1,V1"})
    for m1i, m2i, loglikelihood in zip(m1, m2, loglikes):
        append_likelihood_result_to_xmldoc(xmldoc, loglikelihood, **{"mass1": m1i, "mass2": m2i})

    from ligo.lw import utils
    utils.write_filename(xmldoc, "iotest.xml.gz", gz=True)