RLFuzz/config.py at main · crosscon/RLFuzz · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
"""
- This is the main config file for the TheHuzz project
- Use the configManager library to parse this config file
- This script requires the environment variable THEHUZZ_ROOT to be set because it uses os.environ["THEHUZZ_ROOT"]
    - This variable is set by the thehuzz_setup.sh script
- Note:
    - All the variables in argVars will be availabe to be edited as arguments
      from the terminal
      - Ex: max_fuzz_time can be edited as:
        - python3  <script>.py --max_fuzz_time 500 (or) python3  <script>.py -mt 500
      - Run python3 <script>.py --help for list of available variables
    - Dont use one variable to create another variable
        - Use functions to do this. For example, see vdb_cov_files'
        - All functions will be replaced with absolute values uisng data from input
          arguments IN THE ORDER THEY ARE DEFINED
            - so, avoid using value of one func in another
    - New variables with names as keys of argVars dict will be created and added
      to config by the configManager
      - Ex: 'core_name' will be a variable of config and can be accessed as 'config.core_name'
    - For arrays use this format: python3 <script>.py --list 1 1 0

- TODOs:
    - add debug run mode,  nop run mode, and random run mode
"""


###########
# imports #
###########
import os
from datetime import datetime
from string import Template
import config_rc as RC, config_cva6 as CVA6, config_boom as BOOM
import config_spike as SPIKE
import rc_inst_list, cva6_inst_list, boom_inst_list

#################
# ARG VARIABLES #
#################
"""
- Structure of argVars values:
    - 's': short name, 'v': default value, 'c': valid choices, 'h': help text
- All variables in the argVars are user options
- Change the 'v' value of any variable as needed
    - You can also change these variables from the terminal to avoid changing the script itself
        - Run 'python3 <file_name>.py --help' for more details
"""

argVars = {}
argVars['core_name']        = {'s' : 'co' , 'v' : 'rc',     'c' : ['rc', 'cva6', 'boom'],   'h' : "select core(benchmark) to fuzz"}
argVars['run_id']           = {'s' : 'id' , 'v' : f"{datetime.now().strftime('%y_%m_%d_%H_%M_%S')}",  'c' : None, 'h' : "select run name to store results (uses current time by default)"}

argVars['run_mode']         = {'s' : 'rm' , 'v' : 'thehuzz','c' : ['prof', 'thehuzz', 'random'], 'h' : "select whether running TheHuzz profiling or TheHuzz fuzzer"}
argVars['run_task']         = {'s' : 'rt' , 'v' : 'fuzz',   'c' : ['fuzz', 'check_mismatches'], 'h' : "runs normally if fuzz is selected. rest of the options are subtasks that run corresponding task and quit"}
argVars['max_fuzz_time']    = {'s' : 'mt' , 'v' : 604_800,  'c' : None,     'h' : "select max time to run (in seconds)"}
argVars['max_fuzz_progs']   = {'s' : 'mp' , 'v' : 50_000,   'c' : None,     'h' : "select max no. of testcases to run"}
argVars['target_cov']       = {'s' : 'tc' , 'v' : 100,      'c' : None,     'h' : "select coverage target (in percentage)"}
argVars['sim_batch_size']   = {'s' : 'sj' , 'v' : 10,       'c' : None,     'h' : "select number of testcases to simulate at once (recommended between 10 to 100)"}
argVars['seed_gen_interval']= {'s' : 'sge', 'v' : 2000,     'c' : None,     'h' : "new seeds will be introduced after these many mutations to ensure fuzzer explores entire design"}
argVars['no_threads']       = {'s' : 'j'  , 'v' : 10,       'c' : None,     'h' : "select max no of threads to use (should not be greater than sim_batch_size)"}
argVars['store_elf_file']   = {'s' : 'sfe', 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to store the generated elf files"}
argVars['store_trace_file'] = {'s' : 'sft', 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to store the generated trace files"}
argVars['store_cov_file']   = {'s' : 'sfc', 'v' : 0,        'c' : [0,1],    'h' : "set to 1 to store the coverage files (not needed to run TheHuzz, setting this reduces fuzzer speed)"}

argVars['start_type_cov']   = {'s' : 'stc', 'v' : 'new',                     'c' : ['new', 'continue'], 'h' : "select continue if fuzzer should continue using existing coverage, make sure to set the file path with -icf arg"}
argVars['input_cov_file']   = {'s' : 'icf', 'v' : '',                        'c' : None,                'h' : "set the full path for the json coverage dictionary file that fuzzer should continue using"}
argVars['cov_types']        = {'s' : 'ct' , 'v' : ['line', 'branch', 'cond', 'fsm', 'tgl'], 'c' : None, 'h' : "select which coverage metrics to collect and use for results (available types: 'line', 'branch', 'cond', 'fsm', 'tgl')"}
argVars['feedback_cov_types']={'s' : 'fct', 'v' : ['line', 'branch', 'cond', 'fsm', 'tgl'], 'c' : None, 'h' : "select which coverage metrics to use as feedback  (available types: 'line', 'branch', 'cond', 'fsm', 'tgl')"}
argVars['cov_enable']       = {'s' : 'ce' , 'v' : 1,        'c' : [1],      'h' : "set to 1 to collect coverage (this has to be enabled if fuzzer should use coverage feedback)"}

argVars['detecting_bugs']   = {'s' : 'db' , 'v' : 0,        'c' : [0, 1],      'h' : "sets bug hunting on/off"}
argVars['debug_print']      = {'s' : 'dp' , 'v' : 0,        'c' : [0,1],    'h' : "set to 1 to enable debug print messages in the log file"}
argVars['force_delete']     = {'s' : 'fd' , 'v' : 0,        'c' : [0,1],    'h' : 'set to 1 to skip confirmation when deleting files & dirs (use with caution)'}

# profiling related
argVars['prof_gen_progs']   = {'s' : 'pg' , 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to generate program files"}
argVars['prof_mut_progs']   = {'s' : 'pm' , 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to mutate program files"}
argVars['prof_run_progs']   = {'s' : 'pr' , 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to simulate program files"}
argVars['prof_merge_cov']   = {'s' : 'pmc', 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to merge cov and collect cov increments"}
argVars['prof_run_optimizer']={'s' : 'pro', 'v' : 1,        'c' : [0,1],    'h' : "set to 1 to run the optimizer"}
argVars['prof_check_prog_files'] = {'s' : 'pc', 'v' : 1,    'c' : [0,1],    'h' : "set to 1 to check if all prog files are generated correctly"}

# graph related
argVars['cov_type_to_plot'] = {'s' : 'ctp', 'v' : 'total',  'c' : ['line', 'branch', 'cond', 'fsm', 'tgl', 'total'], 'h' : "select cov types to plot"}
argVars['runs_to_plot']     = {'s' : 'rtp', 'v' : ["thehuzz", "random"],'c' : None, 'h' : "select run types to plot"}
argVars['graph_time_prog']  = {'s' : 'gtp', 'v' : 'prog',   'c' : ['time', 'prog'], 'h' : "select time or programs for x-axis"}
argVars['no_col_per_exp']   = {'s' : 'cpe', 'v' : 7,        'c' : None,     'h' : "specify the no. of columns of data per experiment in excel sheet"}
argVars['graph_skip_rows']  = {'s' : 'gsr', 'v' : 1,        'c' : None,     'h' : "select no. of rows to skip in excel sheet"}
argVars['graph_prog_step']  = {'s' : 'gps', 'v' : 100,      'c' : None,     'h' : "set resolution of points to plot"}
argVars['graph_prog_tick']  = {'s' : 'gpt', 'v' : 1000,     'c' : None,     'h' : "set resolution of ticks on x-axis"}
argVars['graph_time_step']  = {'s' : 'gts', 'v' : 60,       'c' : None,     'h' : "set resolution of points to plot (in sec)"}
argVars['graph_time_tick']  = {'s' : 'gtt', 'v' : 60*60,    'c' : None,     'h' : "set resolution of ticks on x-axis (in sec)"}
argVars['graph_in_percent'] = {'s' : 'gip', 'v' :  1,       'c' : [0,1],    'h' : "set to 1 to plot coverage percentage on y-axis, else it plots no. of cov points"}
argVars['graph_4x_plot']    = {'s' : 'g4p', 'v' :  0,       'c' : [0,1],    'h' : "set to 1 to plot broken axis graph"}
argVars['graph_max_progs_to_plot']  = {'s' : 'gmp', 'v' : 20_000,'c': None, 'h' : "set max no. of testcases to plot"}
argVars['graph_max_time_to_plot']   = {'s' : 'gmt', 'v' : 10**10,'c': None, 'h' : "set max time to plot (in sec)"}
argVars['use_cust_cov_files_dir']   = {'s' : 'gcd', 'v' : "",    'c': None, 'h' : "provide a custom dir for cov files (if empty string, default path will be used)"}
argVars['use_cust_graph_excel_file']= {'s' : 'gef', 'v' : "",    'c': None, 'h' : "provide a custom excel file path (if empty string, default path will be used)"}
argVars['use_cust_graph_plot_file'] = {'s' : 'gpf', 'v' : "",    'c': None, 'h' : "provide a custom graph output file (if empty string, default path will be used)"}
argVars['use_cust_ref_cov_dict_file']={'s' : 'gcf', 'v' : "",    'c': None, 'h' : "provide a custom path for the reference coverage dict file (if empty string, default path will be used)"}
argVars['excel_file_path']          = {'s' : 'efp', 'v' : '',    'c' : None,'h' : "full path to the excel file where the cov data should be stored"}

# bug detection related
argVars['ign_mm_after_first']= {'s': 'bif', 'v' : 1,        'c' : [0,1],    'h' : "set to ignore any mismatches after the first mismatch in trace output of each program input"}
argVars['ign_itr_mm']        = {'s': 'bim', 'v' : [1],       'c' : None,     'h' : "provide a array of mismatch ids to ignore. Ex: -bim 1 5 9"}


################################
# EDIT WITH CAUTION BELOW THIS #
################################
"""
- All variables below are developer options, edit with caution
- Check the pt function for the directory structure of the project
"""

# coverage files
def vdb_cov_files(): return [f"{cov_type}.verilog.data.xml" for cov_type in cov_types]

# core specific variables #
def CORE():
    if core_name == 'rc':
        CORE_local = RC
    elif core_name == 'cva6':
        CORE_local = CVA6
    elif core_name == 'boom':
        CORE_local = BOOM
    else:
        assert False, f"unknown core:{core_name} encountered"
    assert core_name == CORE_local.core_name, f"incorrect core config file imported, {core_name}, {CORE_local.core_name}"
    assert CORE_local.ready, f"core is not ready to run yet {CORE_local.ready}"

    return CORE_local

# core instance list for coverage
def core_instance_list():
    if core_name == 'rc':
        core_instance_list_local = rc_inst_list.l
    elif core_name == 'cva6':
        core_instance_list_local = cva6_inst_list.l
    elif core_name == 'boom':
        core_instance_list_local = boom_inst_list.l
    else:
        assert False, f"unknown core:{core_name} encountered"

    return core_instance_list_local

# emu specific variables
def EMU():
    # select the emulator to use depending on the ISA
    isa = CORE.isa

    if isa == 'riscv':
        EMU_local = SPIKE
    else:
        assert False, f"unknown isa:{isa} encountered for core:{core_name}"
    assert EMU_local.ready, f"emulator is not ready to run yet {EMU_local.ready}"

    return EMU_local

#inferred paths (update these if the soc or fuzzer git repo structure is changed)
def pt():

    pt = {} # dictionary to store all paths

    emu_name_local = EMU.emu_name

    # name of the run/database
    pt['run_name'] = f"{core_name}_{run_mode}_{run_id}"

    # git repo path
    pt["root_dir"] = os.environ["THEHUZZ_ROOT"]

    # root
    pt["benchmarks_dir"]=os.path.join(pt["root_dir"] , "benchmarks/"        )
    pt["docs_dir"]     = os.path.join(pt["root_dir"] , "docs/"              )
    pt["utils_dir"]    = os.path.join(pt["root_dir"] , "utils/"             )
    pt["thehuzz_dir"]  = os.path.join(pt["root_dir"] , "thehuzz/"           )
    pt["sw_dir"]       = os.path.join(pt["root_dir"] , "software/"          )
    pt["outputs_dir"]  = os.path.join(pt["root_dir"] , "outputs/"           )
    pt["input_seeds_dir"] = os.path.join(pt["root_dir"], "input_seeds/"     )
    pt["setup_scripts_dir"] = os.path.join(pt["root_dir"], "setup_scripts/" )

    # utils dir
    pt["opt_sol_file"] = os.path.join(pt["utils_dir"], f"cplex_cov_sol_{core_name}.json")
    pt["sim_bash_file"] = os.path.join(pt["utils_dir"], f"vcs_run_{core_name}.bash")
    pt["emu_bash_file"] = os.path.join(pt["utils_dir"], f"emu_run_{emu_name_local}.bash")

    # sw_dir
    pt["sw_run_dir"] = os.path.join(pt["sw_dir"], CORE.isa)

    # outputs dir
    pt["outputs_run_dir"]   = os.path.join(pt["outputs_dir"], pt['run_name']) # dir where sim outs will be stored
    pt["trash_run_dir"]     = os.path.join(pt["outputs_dir"], f"trash_{pt['run_name']}/") # dir where all useless files are moved (bcz moving is faster than deleting)


    if use_cust_cov_files_dir == "":
        pt['graph_cov_files_dir'] = os.path.join(pt['outputs_dir'], 'cov_files')
    else:
        pt['graph_cov_files_dir'] = use_cust_graph_excel_file

    if use_cust_graph_excel_file == "":
        pt['graph_excel_file'] = os.path.join(pt['outputs_dir'], 'exp_data.xlsx')
    else:
        pt['graph_excel_file'] = use_cust_graph_excel_file

    if use_cust_graph_plot_file == "":
        pt['graph_plot_file'] = os.path.join(pt['outputs_dir'], 'exp_plot.pdf')
    else:
        pt['graph_plot_file'] = use_cust_graph_plot_file

    if use_cust_ref_cov_dict_file == "":
        pt['graph_ref_cov_dict_file'] = os.path.join(pt['outputs_dir'], f'{core_name}_ref_cov_dict.json')
    else:
        pt['graph_ref_cov_dict_file'] = use_cust_ref_cov_dict_file


    # outputs/<run_name> dir
    pt["gen_progs_dir"]      = os.path.join(pt["outputs_run_dir"], "gen_progs") # dir name where i/p progs r generated
    pt["sim_store_dir"]      = os.path.join(pt["outputs_run_dir"], "sim_out") # dir name where sim logs are stored
    pt["all_progs_dir"]      = os.path.join(pt["outputs_run_dir"], "all_progs") # dir where fuzzer will store all testcases
    pt["merged_covs_dir"]    = os.path.join(pt["outputs_run_dir"], "merged_covs") # dir where profiler stores merged cov
    pt["merged_cov_file"]    = os.path.join(pt["outputs_run_dir"], "merged_cov.json") # file where merged cov is stored
    pt["cov_log_file"]       = os.path.join(pt["outputs_run_dir"], "cov_log.jsonl") # file where merged cov is stored
    pt["inputs_log_file"]    = os.path.join(pt["outputs_run_dir"], "inputs_log.txt") # file where logs about testcases is stored
    pt["fuzz_log_file"]      = os.path.join(pt["outputs_run_dir"], "fuzz_log.txt") # file where fuzz log is stored
    pt["cov_data_dict_file"] = os.path.join(pt["outputs_run_dir"], "cov_data_dict.json") # file where merged cov is stored
    pt["mismatches_summary_file"] = os.path.join(pt["outputs_run_dir"], "mismatches_summary.json") # file where a summary of all mismatches is recorded

    pt["hyp_params_file"]    = os.path.join(pt["outputs_run_dir"], "hyper_params.txt") # file where hyper parameters are stored

    # outputs/<run_name>/gen_progs dir
    pt['hex_file_t']    = Template("inst_file_$fno.hex")
    pt['hex_file_re']   = "inst_file_(\d+).hex"
    pt['mem_file_t']    = Template("inst_file_$fno.mem")  # should be same as hex file name
    pt['riscv_file_t']  = Template("inst_file_$fno.riscv")

    # outputs/<run_name>/sim_out dir
    pt['trace_out_t'] = Template("rtl_trace_out_$fno.log")
    pt['trace_out_re']= "rtl_trace_out_(\d+).log"
    pt['bug_out_t']   = Template("rtl_bug_out_$fno.log")
    pt['cov_out_t']   = Template("rtl_cov_out_$fno.pickle")
    pt['emu_trace_out_t'] = Template("emu_trace_out_$fno.log")
    pt['comp_trace_out_t'] = Template("comp_trace_out_$fno.json")

    # outputs/<run_name>/all_progs_dir
    pt['hex_file_itr_t']  = Template("inst_file_$fno_itr_$itrno.hex")
    pt['hex_file_itr_re'] = "inst_file_(\d+)_itr_(\d+).hex"
    pt['hex_file_mut_t']  = Template("inst_file_${file_no}_itr_${file_itr}_mut_${mut}_inst_${inst_no}.hex")
    pt['hex_file_mut_re'] = "inst_file_(\d+)_itr_(\d+)_mut_(\d+)_inst_(\d+).hex"
    pt['mem_file_mut_re'] = "inst_file_(\d+)_itr_(\d+)_mut_(\d+)_inst_(\d+).mem"

    # inputs dir
    pt['seed_input_file_re'] = "[^\.](.*).hex" # shouldnt start with . bcz they are swap files

    return pt

# fuzzer variables and other variables
num_inst_in_prog_prof   = 20  # number of times the instrn will get mutated when profiling
num_itr_per_inst        = 8  # number of times each instrn is generated for profiling
num_inst_in_prog        = 80 # number of instructions in seed testcases
num_nops_at_start       = 15 # number of nop instructions before testing instructions in testcase
num_nops_at_end         = 15 # number of nop instructions after testing instructions in testcase
all_cov_types           = ['line', 'branch', 'cond', 'fsm', 'tgl', 'total'] # available types of coverage
num_times_to_mut        = 10 # maximum number of times testcases are mutated during fuzzing

###############
# depreciated #
###############
    #pt["gen_progs_run_dir_name"]= f"gen_progs_{run_name}/"  # dir name where i/p progs r generated

    #argVars['clone_no']         = {'s' : 'cl' , 'v' : 0,        'c' : None,     'h' : "select current clone if using multiple instances of fuzzer parallely, else ignore it"}