From c7da9ca74ccb87b16843433542be2ced7643bf96 Mon Sep 17 00:00:00 2001 From: haoyinyin Date: Thu, 8 Aug 2024 21:33:29 +0800 Subject: [PATCH] examine gini can be negative --- .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 200 bytes .../__pycache__/metrics.cpython-310.pyc | Bin 0 -> 3070 bytes .../zero_inflated_lognormal.cpython-310.pyc | Bin 0 -> 2212 bytes lifetime_value/metrics.py | 6 ++-- lifetime_value/metrics_test.py | 32 +++++++++++++++++- 5 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 lifetime_value/__pycache__/__init__.cpython-310.pyc create mode 100644 lifetime_value/__pycache__/metrics.cpython-310.pyc create mode 100644 lifetime_value/__pycache__/zero_inflated_lognormal.cpython-310.pyc diff --git a/lifetime_value/__pycache__/__init__.cpython-310.pyc b/lifetime_value/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..192f2d3c419599e10e75522d29b3567562644a79 GIT binary patch literal 200 zcmd1j<>g`k0`Wsz(%v#LFgylvkO5GP!2yViMSw&KLkeRKLkNUs1ky~w44TX@8G%xo zjJFuI{4|-O*mF}$iZYXnqr|IHi}K?$^U`t>OHxzfbMn*k@{4j4b5=4GF#|P#iC+dT zRx#WHa^HWN5Qtd#t7P9~e4n7`6 E05sM!P5=M^ literal 0 HcmV?d00001 diff --git a/lifetime_value/__pycache__/metrics.cpython-310.pyc b/lifetime_value/__pycache__/metrics.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d1638fa769c94814ead68c62d8f7a2ab5dacf0a GIT binary patch literal 3070 zcmbVOOK%)Y74E98>3R5h9g`a;JW5>M#Yo7o0E8?nTu6vmpa^hQql@O2yK2Vm^kZDr zoqKKBt3-kg3-%H6EVJi7AR+MwddmW3#~S6qcdBPR9w#e$G^ZY2b?Tf`-}z1#Yiqp# z&p-bDx6waWgWz8@xc)>Ke1X6FD})XPfo7VgEaL;ts2`@`K=`&bhU`5y8BR->XB8M8LYU%mP3DOu8t5R#HJ2OM9pMBT&S+O z9=!_e8czA|4ZVu--n4H!diwp1oxC1S26GE(U##!`^hk`|K?nZWl zyg*I_F@u;tb4*a{ehTrY;4=7QFajntf67rbPb9AmBpAx5`!H0C=2 zP|;Q^PdK#%t1uT*@5jcJu(Ws#TI0TAUz zF81!qM~QXid{tJVe~1OPsB%pRDvv(I0vR*}AgZ+7lTVHkCpGSCME50+l#}Jx#Y`bk z-JFXH2426G3R?5RW-3k2jP_t8$X-1+eI>juMVTT_5xzxHz6+s|cbCC5*hajM5owC9 z4)0JT#pBnMw3haa@yGL@pZB>XVyj#3NKFhiDVV z@T)bA8RTDM%leID@xsG$tGCJofJxAdLh;rTV3$~m>?WFiKwBtQ#8M@{42h|xKpi3{ zh>Z?{SL|`WRr7o@5bng5bvZVnfY z4{hem7kJf#klF8`|AC1_3f{zRbml}ln_o7)7`icwR-*91Pt#-9x{XIz}Un%OV`=BO)>SpSdSTh z6N~Fu8>;cxnlVbuaFqWVYaKC{V5|6T;@5d&Kf}b*tx##Z0?r5n%|KX4FHcw~Z7+fn zh3z?e7(8bvbKymZPc_GU7W#?SzHo_%zXD7IW05-jQG}GOO zM5ub|C(G2QO`N1CV&<7iQL`2Jix|XXu%W#tM@EjSJT8+Wmm|+*6L4Ze6}7}W^Nq?T zDF#Vi7P2(CM4?xybTj@rtm3Dpbn;L+lvL=TdV}6qP4)lk2mZjg$RNSzt5}GOdDYXvpca&OVVD%5t1=Gm>+reM>2}j7;8UI}S90qam-5=E~xtYAsJ~UZL;(HMF%1mQM#N2K0E>7aA ztXufj85_@+b(@OQwS&rWZv_ypLGtz2J5i%lfzK_Hd|+csQ)1@^nJqF%+JBjk5Xfj9u&YtmE0OZjI7Hm4h}mSgTH(PYo&)p4~lvHo5NoCKImt zv&j0+epcuzHTS9XIa($*R#}I4kn}u^7=Ise{!WCwx3_P1wmPCCc=u|SufO{ranJ$O literal 0 HcmV?d00001 diff --git a/lifetime_value/__pycache__/zero_inflated_lognormal.cpython-310.pyc b/lifetime_value/__pycache__/zero_inflated_lognormal.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c798b48003299a51c766e029b9d79efd440975d8 GIT binary patch literal 2212 zcmah~PjA~c6c;I3lI{GflO|1347v=hfzCkITeh|T-F3xo6+Y+w{`NB&qeV?lE-PNIP=nCkQX8gQy59<#f9+FB<3o~xOW_;WA2Rv zXGx@zBJZ8U;f(#thE0i*l`Xg$i##H@JZ}y7yYTJ7Crjv*F(f7Sl#x?9#gRF*W{uKd zl$p%>jmC7_I3rWgf)xm*UYXLlrmfc-%)HbzOEQ-^C1v(}?3U&k*)X=i)`l^)N(;hu zPX5>n(`byb%0(m)b;iToD`M{ia^Zb47Z_l#)$+V&Vld8luH-IEJj@qJcD-Nvp^6Rz znVj&4-lJEZ;<<#}fz4V&@BK;VXTB5jczxr}ha2aKo*%CV?%t8A!PI2Ld;8~@D0sIa(1X7+DrBOPV;+n|(FdKhh^mH+!j$_B9mhLEtb^u1Pru!L z2?=?*z)YYqbA5_w4i;$+E+N+&O{#l0$iOpao8Urrpj&I+C2Qm^q4YCCFX)A3ircUZ zNDTWtl@XqL(V8&0MBoaf4e)Udfin{^b!35ro>7f7qA^ut6vl{9Wy}D3L#syVwlQwU>k8Rg zx;mGxouMB(wZWdTVKBh@9gM{tF3)b28s%HY)Tvj7osz2c;YQggZPq@ev)iSEJODl| zozk{3bxW&sw+!IKayW&g>=oH)jHFP9{6AmxdEA{>R88FalOS zBDsHKL4o3`xq*&_1sD~`=>78xXy+yjdjcPU7hSWmCxL=k@Q*C;+p#DzqzzOa;eb+? zid*m#NR2M3tnh6jyPE=0FA)9$ADhaGLaBhZ7{0$9N{K}xkpU(>R7pSO0p6(JexYmE zbFJruGDs7t1iVtnFL)HV^w;}I9*Rj237qFj6hK`o3)NxeWZ*p;XJQE*uV}~ewNRDA zM>0u^ylNIXN25m^wm{*nOcgV~2|3@_FOTm+Xu8sLn*u4Xi%{h*ijcp$7|nCwuSY3_ zlf+2`aTGZ$OL>(Mzb(U<2X$%iR|Ovk4%!PntXsNxi48pLjRgpec0oh-pj+#7$QrfD zJz`y~nJ%%34S$>5Asu49x6KaBUAjpqao@X^xDQ*pOO+D@tcZfZ->A_DW%-T9e5iWI zJC$AEL`9Ex^xgTJS86&RVLwR|HSr-~qiUJqwIuy9hAhSBczCkzgA-Z1! cFUKi=f{2qCwYAzI7{sROu3J!1EwXa)FVcvC-2eap literal 0 HcmV?d00001 diff --git a/lifetime_value/metrics.py b/lifetime_value/metrics.py index 5bb7c7b..946561d 100644 --- a/lifetime_value/metrics.py +++ b/lifetime_value/metrics.py @@ -18,7 +18,7 @@ import numpy as np import pandas as pd -from sklearn import metrics +import sklearn.metrics as sk_metrics def cumulative_true( @@ -62,11 +62,11 @@ def gini_from_gain(df: pd.DataFrame) -> pd.DataFrame: def _normalized_rmse(y_true, y_pred): - return np.sqrt(metrics.mean_squared_error(y_true, y_pred)) / y_true.mean() + return np.sqrt(sk_metrics.mean_squared_error(y_true, y_pred)) / y_true.mean() def _normalized_mae(y_true, y_pred): - return metrics.mean_absolute_error(y_true, y_pred) / y_true.mean() + return sk_metrics.mean_absolute_error(y_true, y_pred) / y_true.mean() def _aggregate_fn(df): diff --git a/lifetime_value/metrics_test.py b/lifetime_value/metrics_test.py index 48a8b99..4835459 100644 --- a/lifetime_value/metrics_test.py +++ b/lifetime_value/metrics_test.py @@ -14,7 +14,13 @@ # ============================================================================ # Lint as: python3 # Dependency imports - +import sys +import os +import io +current_path = os.getcwd() +sys.path.append(os.getcwd()) +sys.path.append(os.path.join(current_path, "lifetime_value")) +print(sys.path) from lifetime_value import metrics import numpy as np import pandas as pd @@ -61,6 +67,30 @@ def test_decile_stats(self): rtol=1e-2, atol=1000)) + def test_gini_negative(self): + test_df2 = """a,0.1,0.115 +b,0.1,0.112 +c,0.1,0.1151 +d,0.9,0.01""" + df = pd.read_csv(io.StringIO(test_df2), header=None) + column_names = ['uid', 'label1', 'pred_scores'] + df.columns = column_names + df = df.sort_values(by='label1', ascending=False) + print(f"test_df: {df.head(10)}") + + + total_value = np.sum(df['label1']) + cumulative_true = np.cumsum(df['label1']) / total_value + gain_model = metrics.cumulative_true( + df['label1'], df['pred_scores']) + gain = pd.DataFrame({ + 'ground_truth': cumulative_true, + 'random_model': gain_model + }) + gini = metrics.gini_from_gain(gain) + print(f"test_gini: {gini.head(10)}") + assert gini.loc['random_model', 'raw'] < 0 + if __name__ == '__main__': unittest.main()