potash · potash · Oct 17, 2017 · Oct 18, 2017 · Oct 18, 2017 · Oct 24, 2017
diff --git a/lead/features/wic.py b/lead/features/wic.py
@@ -46,7 +46,7 @@ def __init__(self, spacedeltas, dates, parallel=False):
                 parallel=parallel)
 
     def get_aggregates(self, date, delta):
-        enroll = self.inputs[0].get_result()
+        enroll = self.inputs[0].result
         aggregates = [
             Aggregate('medical_risk', 'any', fname=False),
             Aggregate(['household_size', 'household_income'], 
@@ -89,7 +89,7 @@ def __init__(self, spacedeltas, dates, parallel=False):
                 parallel=parallel)
 
     def get_aggregates(self, date, delta):
-        births = self.inputs[0].get_result()
+        births = self.inputs[0].result
         aggregates = [
             Aggregate('length', 'max', fname=False),
             Aggregate('weight', 'max', fname=False),
@@ -125,7 +125,7 @@ def __init__(self, spacedeltas, dates, parallel=False):
                 parallel=parallel)
 
     def get_aggregates(self, date, delta):
-        prenatal = self.inputs[0].get_result()
+        prenatal = self.inputs[0].result
 
         aggregates = [
             Count(),

diff --git a/lead/model/data.py b/lead/model/data.py
@@ -94,9 +94,9 @@ def run(self, acs, left, aux=None):
         left = data.binarize(left, ['community_area_id', 'ward_id'], astype=self.dtype)
 
         logging.info('Joining aggregations')
-        X = left.join([a.get_result() for a in self.aggregation_joins] + [acs])
+        X = left.join([a.result for a in self.aggregation_joins] + [acs])
         # delete all aggregation inputs so that memory can be freed
-        for a in self.aggregation_joins: del a._result
+        for a in self.aggregation_joins: del a.result
 
         if not self.address:
             logging.info('Adding auxillary features')

diff --git a/lead/model/transform.py b/lead/model/transform.py
@@ -14,19 +14,23 @@ class LeadTransform(Step):
     performing feature selection and creating sample weights.
     """
     def __init__(self, inputs, outcome_expr, aggregations,
-            wic_sample_weight=0, exclude=[], include=[]):
+            outcome_where_expr=None, wic_sample_weight=0,
+            exclude=[], include=[]):
         """
         Args:
             inputs: list containing a LeadCrossValidate step
             outcome_expr: the query to perform on the auxillary information to produce an outcome variable
             aggregations: defines which of the SpacetimeAggregations to include
-            and which to drop
+                and which to drop
+            outcome_where_expr: where to evaluate the outcome_expr,
+                defaults to None, which means everywhere
             wic_sample_weight: optional different sample weight for wic kids
         """
         Step.__init__(self,
                 inputs=inputs,
                 outcome_expr=outcome_expr,
                 aggregations=aggregations,
+                outcome_where_expr=outcome_where_expr,
                 wic_sample_weight=wic_sample_weight, 
                 exclude=exclude, include=include)
 
@@ -40,6 +44,8 @@ def run(self, X, aux, train, test):
 
         """
         y = aux.eval(self.outcome_expr)
+        if self.outcome_where_expr is not None:
+            y = y.where(aux.eval(self.outcome_where_expr))
 
         logging.info('Selecting aggregations')
         aggregations = self.get_input(LeadData).aggregations

diff --git a/lead/model/workflows.py b/lead/model/workflows.py
@@ -143,7 +143,8 @@ def bll6_models(estimators, cv_search={}, transform_search={}):
     transformd = dict(
         wic_sample_weight=[0],
         aggregations=aggregations.args,
-        outcome_expr=['max_bll0 >= 6']
+        outcome_expr='max_bll0 >= 6',
+        outcome_where_expr='max_bll0 == max_bll0' # this means max_bll0.notnull()
     )
     transformd.update(transform_search)
     return models(estimators, cvd, transformd)