Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 135 additions & 10 deletions iot/inverse_optimal_tax.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from statsmodels.nonparametric.kernel_regression import KernelReg
from scipy.interpolate import UnivariateSpline
from scipy.linalg import lstsq
from scipy.optimize import minimize


class IOT:
Expand Down Expand Up @@ -246,12 +247,8 @@ def compute_income_dist(
).values
/ data[weight_var].sum()
).sum()
# F = st.lognorm.cdf(z_line, s=(sigmasq) ** 0.5, scale=np.exp(mu))
# f = st.lognorm.pdf(z_line, s=(sigmasq) ** 0.5, scale=np.exp(mu))
# f = f / np.sum(f)
# f_prime = np.gradient(f, edge_order=2)

# analytical derivative of lognormal
# analytical expressions for F, f, f_prime
sigma = np.sqrt(sigmasq)
F = (1 / 2) * (
1
Expand All @@ -274,14 +271,142 @@ def compute_income_dist(
)
elif dist_type == "kde":
# uses the original full data for kde estimation
data2 = data.copy()
# for income > 500,000 replace actual values with draws from
# a pareto distribution
# data2.loc[data2[income_measure] > 500_000, income_measure] = st.pareto.rvs(
# 1.16, size=data2.loc[data2[income_measure] > 500_000].shape[0]
# )
f_function = st.gaussian_kde(
data[income_measure],
# bw_method=kde_bw,
data2[income_measure],
bw_method=kde_bw,
weights=data[weight_var],
)
f = f_function.pdf(z_line)

# Now splice a Pareto distribution to the KDE
# Step 1: Define the splicing point
splice_point = 350_000 # 500000
# Step 2: Evaluate the KDE at the splice point to find
# its value and derivative
kde_value_at_splice = f_function(splice_point)
# For the derivative, use a small delta to compute numerically
delta = 0.0001 * splice_point
kde_derivative_at_splice = (
f_function(splice_point + delta)
- f_function(splice_point - delta)
) / (2 * delta)

# Step 3: Define a Pareto distribution function that matches
# at the splice point
# The Pareto PDF is: α * x_m^α / x^(α+1) where x_m is the
# scale parameter and α is the shape parameter

# def find_pareto_parameters():
# def objective(params):
# alpha, scale = params
# # Calculate the Pareto PDF value at the splice point
# pareto_value = alpha * (scale**alpha) / (splice_point**(alpha+1))
# # Calculate the Pareto derivative at the splice point
# pareto_derivative = -alpha * (alpha+1) * (scale**alpha) / (splice_point**(alpha+2))

# # Compute the error in matching both the value and derivative
# value_error = (pareto_value - kde_value_at_splice)**2
# derivative_error = (pareto_derivative - kde_derivative_at_splice)**2

# # Return the combined error
# return value_error + derivative_error

# # Initial guess for alpha and scale
# # initial_guess = [2.0, splice_point/2]
# initial_guess = [1.3, splice_point/5]

# # Constraints: alpha > 0, scale > 0
# bounds = [(0.1, 10), (0.1*splice_point, splice_point)]

# # Minimize the objective function
# result = minimize(
# objective, initial_guess, bounds=bounds,
# method='L-BFGS-B')
# print("Minimization result:", result)
# if not result.success:
# raise ValueError("Optimization failed: " + result.message)
# if result.fun > 1e-6:
# raise ValueError("Optimization did not converge to a good solution.")
# return result.x

# # Get the optimal Pareto parameters
# alpha, scale = find_pareto_parameters()

# Directly solve for alpha
alpha = (
-(
kde_derivative_at_splice
* splice_point
/ kde_value_at_splice
)
- 1
)

# Now solve for scale using the first equation:
# kde_value = alpha * (scale^alpha) / (splice_point^(alpha+1))
# scale^alpha = kde_value * splice_point^(alpha+1) / alpha
scale = (
kde_value_at_splice * (splice_point ** (alpha + 1)) / alpha
) ** (1 / alpha)

print(f"Calculated alpha: {alpha}")
print(f"Calculated scale: {scale}")

# Verify the Pareto value at the splice point
pareto_value = (
alpha * (scale**alpha) / (splice_point ** (alpha + 1))
)
pareto_derivative = (
-alpha
* (alpha + 1)
* (scale**alpha)
/ (splice_point ** (alpha + 2))
)

print(f"KDE value at splice: {kde_value_at_splice}")
print(f"Pareto value at splice: {pareto_value}")
print(f"Difference: {pareto_value - kde_value_at_splice}")

print(f"KDE derivative at splice: {kde_derivative_at_splice}")
print(f"Pareto derivative at splice: {pareto_derivative}")
print(
f"Derivative difference: {pareto_derivative - kde_derivative_at_splice}"
)

# Step 4: Define the spliced PDF function
def spliced_pdf(x):
if np.isscalar(x):
if x <= splice_point:
return f_function(x)
else:
return alpha * (scale**alpha) / (x ** (alpha + 1))
else:
# Handle array inputs
x = np.asarray(x)
result = np.zeros_like(x, dtype=float)
kde_mask = x <= splice_point
pareto_mask = x > splice_point

if np.any(kde_mask):
result[kde_mask] = f_function(x[kde_mask])
if np.any(pareto_mask):
result[pareto_mask] = (
alpha
* (scale**alpha)
/ (x[pareto_mask] ** (alpha + 1))
)
return result

# f = f_function(z_line)
f = spliced_pdf(z_line)
f = f / np.sum(f) # ensure sum to one
F = np.cumsum(f)
f_prime = np.gradient(f, edge_order=2)
f_prime = np.gradient(f, z_line, edge_order=2)
elif dist_type == "Pln":

def pln_pdf(y, mu, sigma, alpha):
Expand Down Expand Up @@ -386,7 +511,7 @@ def sw_weights(self):
integral = np.trapz(
g_z * self.f, self.z
) # renormalize to integrate to 1
g_z = g_z / integral
g_z = g_z #/ integral

# use Lockwood and Weinzierl formula, which should be equivalent but using numerical differentiation
bracket_term = (
Expand Down
8 changes: 4 additions & 4 deletions iot/iot_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ def plot(self, var="g_z"):
+ "=%{y:.3f}<extra></extra>"
)
else:
if var == "g_z_numerical":
start_idx = 10 # numerical approximation not great near 0
else:
start_idx = 0
# if var == "g_z_numerical":
# start_idx = 0 # numerical approximation not great near 0
# else:
start_idx = 0
y = []
for i in self.iot:
y.append(i.df()[var][start_idx:])
Expand Down
Loading