Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 21 additions & 22 deletions ipfn/ipfn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python
from __future__ import print_function
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -41,7 +40,7 @@ def __init__(self, original, aggregates, dimensions, weight_col='total',
self.conv_rate = convergence_rate
self.max_itr = max_iteration
if verbose not in [0, 1, 2]:
raise(ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}"))
raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {verbose}")
self.verbose = verbose
self.rate_tolerance = rate_tolerance

Expand Down Expand Up @@ -74,7 +73,7 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
m = IPF.iteration()
"""

# Check that the inputs are numpay arrays of floats
# Check that the inputs are numpy arrays of floats
inc = 0
for aggregate in aggregates:
if not isinstance(aggregate, np.ndarray):
Expand Down Expand Up @@ -142,7 +141,7 @@ def ipfn_np(self, m, aggregates, dimensions, weight_col='total'):
ori_ijk = aggregates[inc][item]
m_slice = m[idx]
m_ijk = m_slice.sum()
# print('Current vs original', abs(m_ijk/ori_ijk - 1))
# print("Current vs original", abs(m_ijk/ori_ijk - 1))
if abs(m_ijk / ori_ijk - 1) > max_conv:
max_conv = abs(m_ijk / ori_ijk - 1)

Expand Down Expand Up @@ -254,34 +253,34 @@ def iteration(self):
"""
Runs the ipfn algorithm. Automatically detects of working with numpy ndarray or pandas dataframes.
"""

i = 0
conv = np.inf
old_conv = -np.inf
old_conv = np.inf
conv_list = []
converged = 1
m = self.original

# If the original data input is in pandas DataFrame format
# Prepare input data
if isinstance(self.original, pd.DataFrame):
ipfn_method = self.ipfn_df
elif isinstance(self.original, np.ndarray):
ipfn_method = self.ipfn_np
self.original = self.original.astype('float64')
else:
raise(ValueError(f'Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame'))
while ((i <= self.max_itr and conv > self.conv_rate) and (i <= self.max_itr and abs(conv - old_conv) > self.rate_tolerance)):
old_conv = conv
raise ValueError(f"Data input instance not recognized. The input matrix is not a numpy array or pandas DataFrame")

# Run iterations
for i in range(self.max_itr):
m, conv = ipfn_method(m, self.aggregates, self.dimensions, self.weight_col)
conv_list.append(conv)
i += 1
converged = 1
if i <= self.max_itr:
if (not conv > self.conv_rate) & (self.verbose > 1):
print('ipfn converged: convergence_rate below threshold')
elif not abs(conv - old_conv) > self.rate_tolerance:
print('ipfn converged: convergence_rate not updating or below rate_tolerance')
if conv <= self.conv_rate:
if self.verbose > 1:
print("ipfn converged: convergence_rate below threshold")
break
if abs(conv - old_conv) <= self.rate_tolerance:
print("ipfn converged: convergence_rate not updating or below rate_tolerance")
break
old_conv = conv
else:
print('Maximum iterations reached')
print("Maximum iterations reached")
converged = 0

# Handle the verbose
Expand All @@ -290,6 +289,6 @@ def iteration(self):
elif self.verbose == 1:
return m, converged
elif self.verbose == 2:
return m, converged, pd.DataFrame({'iteration': range(i), 'conv': conv_list}).set_index('iteration')
return m, converged, pd.DataFrame({'iteration': range(1, i+2), 'conv': conv_list}).set_index('iteration')
else:
raise(ValueError(f'wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}'))
raise ValueError(f"wrong verbose input, must be either 0, 1 or 2 but got {self.verbose}")