|
1 | 1 | #!/usr/bin/env python |
2 | 2 |
|
| 3 | +import cloudpickle as cp |
| 4 | +from pydra.utils.hash import Cache, register_serializer |
| 5 | +from sklearn.pipeline import Pipeline |
| 6 | + |
| 7 | + |
| 8 | +@register_serializer |
| 9 | +def bytes_repr_Pipeline(obj: Pipeline, cache: Cache): |
| 10 | + yield cp.dump(obj) |
| 11 | + |
3 | 12 |
|
4 | 13 | def read_file(filename, x_indices=None, target_vars=None, group=None): |
5 | 14 | """Read a CSV data file |
@@ -126,7 +135,27 @@ def calc_metric(output, metrics): |
126 | 135 | return score, output |
127 | 136 |
|
128 | 137 |
|
129 | | -def get_feature_importance(permute, model, gen_feature_importance=True): |
| 138 | +def get_feature_importance( |
| 139 | + *, |
| 140 | + permute: bool, |
| 141 | + model: tuple[Pipeline, list, list], |
| 142 | + gen_feature_importance: bool = True, |
| 143 | +): |
| 144 | + """Compute feature importance for the model |
| 145 | +
|
| 146 | + Parameters |
| 147 | + ---------- |
| 148 | + permute : bool |
| 149 | + Whether or not to run the model in permuted mode |
| 150 | + model : tuple(sklearn.pipeline.Pipeline, list, list) |
| 151 | + The model to compute feature importance for |
| 152 | + gen_feature_importance : bool |
| 153 | + Whether or not to generate the feature importance |
| 154 | + Returns |
| 155 | + ------- |
| 156 | + list |
| 157 | + List of feature importance |
| 158 | + """ |
130 | 159 | if permute or not gen_feature_importance: |
131 | 160 | return [] |
132 | 161 | pipeline, train_index, test_index = model |
@@ -172,7 +201,7 @@ def get_feature_importance(permute, model, gen_feature_importance=True): |
172 | 201 | pipeline_steps.coefs_ |
173 | 202 | pipeline_steps.coef_ |
174 | 203 |
|
175 | | - Please add correct method in tasks.py or if inexistent, |
| 204 | + Please add correct method in tasks.py or if non-existent, |
176 | 205 | set gen_feature_importance to false in the spec file. |
177 | 206 |
|
178 | 207 | This is the error that was returned by sklearn:\n\t{e}\n |
|
0 commit comments