From 3289f0e60ef40c617f4486d1168fa3dbb3cdc8d6 Mon Sep 17 00:00:00 2001 From: Black_tea <13018564+Blacktea0@users.noreply.github.com> Date: Sun, 25 Sep 2022 23:58:03 +0800 Subject: [PATCH] submit hw01 --- HW01/RUJIAHAO798.jpg | Bin 0 -> 12122 bytes HW01/RUJIAHAO798.py | 253 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 HW01/RUJIAHAO798.jpg create mode 100644 HW01/RUJIAHAO798.py diff --git a/HW01/RUJIAHAO798.jpg b/HW01/RUJIAHAO798.jpg new file mode 100644 index 0000000000000000000000000000000000000000..08ba953a29f7f3a6fbc6229e4150abe015faea47 GIT binary patch literal 12122 zcmeHt2T&B-x^52x4oNadPD9Q~qC{m#0ulvABxgr5hzJ7+C`itXL=gdzoJAPPLl7h* zLsF7t$sz+h_PMvtdnfFwbKbqL?ygsRb#-@D_v%&a`~QFa;ahz%b+G_YXv4H%00;yC zpvwceI0vq2I6QRlvhi`SXSWStH+tyeVdL%NaDfHF0b&S*5ORf>kdTmsn3#l&o`US^ zRWcS@I!byd8#fmx8z%>kpo}OFzZ5?Qr*9lC<>- zBO<1|M$f>=&BM#bFCcYYT1Hk*{^l)pn1-g7wvn-k>0L8(3p;xUM<-_&S8pF*KmSJo zkDrD|L_T{S6`hovlA4yDk%@X$@Vf9#QE|!J>YCcR`i91)_w5~>UEMvH-oBC1vGIw? zsV~!Czb!5;udIGwTi@N=KR7%({(15X1c3ho&gJz#0R10$E`fva@bSU;kl*lt@cb`N zFeN?#hxipLRRf5PCpD);C?SnnVt!Q{5meIf2d%BwFfkpM)K~7^-yr=3(Z2>L?EebU zUjh9U&&4!A3I<&&3`_|q14pg^ACQL^Lj33LPZ9iS1%L2`fJf4mx%K>+LbU{KeUc;w zQq=_(AR3uqdKPoLJgTunk~-c)OF6%bqya^J0f_&K=zlFkm`Q->kAy&^M5E_&Ur=Lr zSSKahr!uKmVzZ@_+rar30HdLwKF0HIWoL79;y0Cqr&9NAvvSkv7k(Ym>YAfDcpnwb zxX#s7YjX&#e95(~S_+e8Ta3OtT(|slCdlxRxnYFK#okK5@%EFdRl!_?$vE^#B#8mz z_YZ_!uhNgdH$`E#<>ThH-F)|P883(9PVKQ*d(v`Lf--t%W2a`mnn|l?*9+o) zUU$7=2p4r~CAtr#+wewuuV{%)8S`mnt1NzIHZIGpIJvsBcPYTA;58ag$^3-c7b{wy zBOW+`n-&k>&M;Q)7dJ&pPbWq?UWMAfTEqpdf%uzHusGRU@M*}EKd(L}`C6OQ9UI-SyV zH;)L)5Btndi)9UwA_o)i zHOM)sSE93@!-iUxE5>enU+du+33Aqd71@B&(Y|oy*p!aS@+1f&8{((wXYoF)~wdz7hV+irmt zh339Ynbv2$vG)~HVl%*z_;H!PMlI_0aiQNcuXh)6dH2%T3bp4odcnTIJQS7cWM<>A zoCBYveg$5y`XZFLEaMrbE+?o!Eb=R*Y&m)Y<|yM)-8rzU*VVCDf}bRz82^~5`T-T< zwVtTRtB{!)Rr+WB5>H~2J~%-Xk;JShUxKoCBmv&c&l|*HH^v-wWAq3a+J^QEg+-T0 z-~owO^*mkNrJ{)ICT>(Uetp!-HGbWxkGrnl^scT~HN@m96Jn-mdd3MV_5Pm2+rNpu zl864pX>B>TU|Ep8FQHJ#>4%Y1ofYNk3!uC>j44wX1$%z`=#!8OTMk^#F0y;ubEL%0 z9(Mt7TA4OexypE*$RzfpI^S~8&Z15>%$X|*wpt;7yN4`JPxlw^@WyYW{M6jb)Y%i` z%Vv=HiQH+~c5#cO9&CvT*P=`#jz7PI_#d;#Rt$<#ii zn`nny)z-kumJ|6FIJk-!Rwv%h5}^&;O*NY9RPOc*c@^DQVAMfwyeC4O`B{tdOf?Wt zWBiG)=q(U^RLaS^Q`eAjdO~GBB;0-Mpswzxa>dJ|)9Ij~uI1Ol8}wcX7CeVSna-OYY(ShVGuUCWBCjz^O z4myEwHY>dYA2vAc+>w@wZikmX#Vo6ewLW@L_hBnC;pH$6Eit>Uy(^l+5aMBJP+udO zcO%fhWc{uh;WicFYl@S?V}a+6yaM{*bAib z@_{(FwXzXoA6Tk1Wtdu}W6#&QxOtDQU*{m<&8q&Tv~bP03@@A@Tnps=WvsTsZG1U% za%!n_c?*X$(=~Y7Dwn#z_ipEnw{g6$N?L?ZsX{@5?~tG?m0TNDS+hXV`V;`E_)@GD zuW|O@7|De0eaVPf`~etDoacs+(dnc~=n7@3y=aV2VQu_)rss0qXLOxJhg`=v_L~P> z9AVdRg_NUAlYuczb@+q+to(RTG|n0JoCSfPWv{>LEqjh`V^-A8VjC>ZF; zK!`EfqmSVIH-Hdz(jI2p{;r5;(%rlTI~rl-iUd?+P^ioWP?p>`kM6}_vsOnH1G+uc zKD!$Pcd;c6KSY#?q5>EOx8~EFE#MgcarA~8l|GPd>~8OX=^%h? zC>q7h{d~E8>tHP!g4|g39CBbfdFEZJ`Slo2;vE3Q_qVWw9kUP=hiP%|+b1pol5#9H zaUphPEJFJ5m_W|a4<)`*!HzNlh_;VCo{1+TnznT*flx}x_F+8QWc-Cg=BHVsWNBb$ zERFEkdVg-N2Y1anVb59@z&k~0C{9|fHe7ONBTQ4#J3+$&c;Z*WT*~LyL-r&U%(-;Vg|%Z+|;k7ayXUrZJ%FxA8S`0;0!PB?uV%!SL}{6_mG%xsZ)cN9_wnnB5$%5i(> zIav27t0)(;2$36V|BWB}Z%(J9wuZNDvy$Jle*}$YIi1{jS{MfRK{-3ver)ebXCYH% zKKBs@7e+G4Mm5eRi3I;tl(&mE9qDK-NsN`|u8p3xPki!buG*e%cIZY7Yf+s{X=7S+ zJBtXE`R(X!3v)WPrP#JeF%9~yH5e9Bye4!LW3Q9>%JEM5olrDu6EmAvaDM`2lZSXt#@d!{ z{cM}_I~{x;8uos!cX~RSUPwL2S4|@4zp2)u-F&iN32E+S$uCagBQ4}-^8vUfHIBs>q84bJ_^e_G4iKjhhWM46^ZvaUb{iYh-A z6h>yF>~i%YaI;+(z#|&g$B6G5Ev&)*+@gD24fSGljydMpRcQ(Fp8+7xUSh2Ts`Vmj zG0rFxYRUwkEXE8~(F8y;s@E{v7IKA8y&^m*0WlW&IHdxbD_9cgoFoMaT`7ez!Eop* zc#{3x4yCA)q#*k$0}~R{UA47IHX0DAU!a3IS7Vx2<`fq;Vrz=@TnY&5deY^>lV);zQ-eJM({s=M zM<7bLFvLeXlFsNTDx1c};Q2>fr2ZmGbcTqPHoz_+IinZKx<~^a3pZo;JpDqdjqSV@ zo42ZzxiKiI_L)J5Lijz6nx@F7p%OXe!H~zz(FyOob@+HC67(K>h^u`j%;}Tkb|H~A zPmCj6Z{f2*-Z!Pbd;VctsNO|TvuoD;C@bbkVF11-YfVo!e57E>{e%$<}b1J#1DohlHgB$)3Q&UXGnZbDvs#}z>w*9m1~=- z1rA98uWePyI`Llnd<3%<0#Z{@*ala%=3t*Wmf=KCWxN zBbfcyXM9XP1s_+4c#;=Vo}Mi4$_{Jn1@Hs~$i%}gfL2|Asiy!r&l7TWaTvXmta4@d z`}}cl9{ACL`{~kW(1*t|XM4`ToFdcti4@@e=cj*)=}(jRs704DLqqbn_Ld=+JL%Ss1=d?JdEHq^!~!J{7O#-m?9!1>(~Bg^NT z!t7*0viWY6inLSM7AKvmm%Sgx5QwF2KOKpZo^ql>52Ctkk!ildT*JC(xk;bf`+Q`I zs=PV1zou^VLdPCB*0^DI*aIB~*V0t(*i|UT{BEfO_a>3o1>i@Q08xHsLDvi(H8pcIHr#GAfobsq#7vbBP3y;f zHl|*uP5hekd2wc^!DpR&&nN$T`4L(Dx(1@$1a20fz zL>llnj_K*G)z@OnlC366J%3qlEzh3Sg(oX#+N=bXg`O_4l;jqlpow|ui{D2%Z@Euy z(>U>7*Z%cpXKi%nJrY__ZdCa{w|7B$U92K{r*+~E*nus5#g0iv2^SbNb57GTr)AaJ znl^gKgqA|A9?zfCBO_m{e-?V>f|m~4GtSM(<9!G@Z7dROEEL|X*l<90rZ#W+pVUyA zt|(j;7`#&B6Mb@{WHF&O>FeN5I@krC;tk`bPhh`OijrZ#QLvY01%t`RBzGrio*&_aK3~KqgvrRZA-N_#I@2KnjpoX z&?26tt?>)MD&$p)X|w3Yu0$RD+Ne*g$cP+tT9kSy%_h}RP9eiHHy5%I9XJ*Z7sPIl z{@P~T(`vZ@7AiUpL|v$4$Ljmdys+cOU|lF{MQ>!*>M=PqF<9Q@B`H;d$(2??i+iNxdTX&$ zu31zr+)j-frGh$RME(fU^&)od&=BQVzCHv07nRF|pS*f4iKKHKJc(~AzNYG7o6(4& zK&wti$np9dQWp_vq!!wk_Ko`?>caybSc=o1m>ohtXXQArqU_gfGp}fczfo?F{{l~*YBKP6nSqGVC zj_&S^rLsrL*{5=1 z6iCOZpqeeTZNuqZ-0@yI^6W6?bL|vsb42$`8((>SZwGg&;j><@DD(91n)%M#q3iE@ zmm~SVIlqbJitT1#Q6*I*lmga&H}3zAh6z_wIw*Z(Fd8;pJe%cGctcxCAhaaaqSj`4`1&HXs~5$(mq-`9U&<0Y!00g=p&&HQ9M&>9Rr2~3DqSuqr3bm z$W&OHdh{RDHG?=%;>{vTro=%}-H>%YJlP%VM9T(RLag4`xW}Ga3Z>8BWHNW~$yZ3J zRC?~q@7{*gR=vLOCLiAI?-UJkmDv&tpI;$$a&HY1pOgY}U7uPbrD(ZAND2cM@+Bp9Ej!nq&dGq?F>iywonM(n! zra{#zva2j)g65;12nM6#B6kV1Lj?`zMHdr$5432$O#;zc^4i=p-8WmK2Ly(~JioBJ zbh_H(T#v&0M)KT~6*U>4ijr}&t^t-6wqpeY1zrr-OU)ic41}-tYuKwcmTza49_i<% z`d6XeX35RFCAStjF>!uaFZ^gt2n7y!N0knVsRB7?zNKQBMYnW&>#0jpSQ?MJJ##X9 zWH&Sf>pC)d-BQaAVJPPYI8E5sOq=vz9SWi)yATyFFC=jhiSXe}PLSWo(fNm1#(~fO z`l|cm_Iwb1U)o?Sj9<%AN@{c42*p&*5etpfOoT1FXm8sb7C)*b~zc$O^L0vZp zxmn7bt435f7DRYHa9Wdk8M9Y{>AM6G%g9Mw2fF`&Q-^_0&N$^{fif--H(xZsr~TRB zyk6bP7&oO@*yeAt*LR}|V^)R1HbChw0H=vE#o#^Dc|j84ta3u8VDk^1oiWYTWUB0* zll87t)u%l{ozh;<(;v8?J?&19F+->Kl1KCD-*5&%8>ORPuuFtAzf|ylyeIGYtTn`h zLdzBZ-Cci_8Qx|GfoH!Rw8%pA?8!D=WBu?wn}<+iM`8Bq&SLg@FIlCWln3v~?;3)bICDX5PoipK_L2aD- zixuhVq$(z(7be%aP$#)x_eRx@_c(-m^B(R+ z3+K0$Z|0=V8(wd@jQj-zD8yjbKs}Q*`3CCnnoaa;`IKeFz*0qv9c&N|&BDg>GEn4I zQgf*W^EZ|5ujf0*Pd*@qT6Osb1*G?o<2`&@{r!3s{duPdrCrtuqT9-jHC~v$&YB-; zVd9}7LHc@Qa$R$l7HmQl57W^b?i*F5W-tFC!T48H)mw*WQSz)AeXAUN?I5lNswMV#2Lb(a@F5#ID1N&GADPFc(v_Ftf+N40G_=QLmqz8ttb-AZ`Hga0YLnHpzj@#Q6zBi zm+}AqFpaw(f)YYVL$kx;MH!T4e(?Kh66@UPt8TZq$McEzL=1$4c;@a;M3l86Q`^>% z8^t}24bAV(R_($gbi{jA%tKeddw(($HHI@fq8z2h2t< zmtUm2?+J7JM4-&`qB3qT^*Zk{=fnVQtl7><)#>RtN~7Z@IGa>8lp#lpR5MyXWs!f~ zT;FPPO@9kMkXy6IA{EavirJHUE?+Dz+wR0Hahkjxu%&f%D-Tnc=45u?(IjAnAhby%$-2n;LQdiejYCuKR`iN)% zEjM?;oixJPknR-~Jw-l?-yVn$RKixr6Bq&ibO!Ed5i7~>=z&q}N t$acyAAx{lr{3u!)9}{{S;MeakhQCue{1a;Q&z}E=h5i?X1-|(5zW~+a!`A=+ literal 0 HcmV?d00001 diff --git a/HW01/RUJIAHAO798.py b/HW01/RUJIAHAO798.py new file mode 100644 index 00000000..6d763100 --- /dev/null +++ b/HW01/RUJIAHAO798.py @@ -0,0 +1,253 @@ +# Numerical Operations +import csv +import math +import os + +import numpy as np +# Reading/Writing Data +import pandas as pd +# Pytorch +import torch +import torch.nn as nn +from sklearn.feature_selection import SelectFromModel +from sklearn.linear_model import RidgeCV +from torch.utils.data import Dataset, DataLoader, random_split +# For Progress Bar +from tqdm import tqdm + + +# For plotting learning curve +# from torch.utils.tensorboard import SummaryWriter + +def same_seed(seed): + """Fixes random number generator seeds for reproducibility.""" + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def train_valid_split(data_set, valid_ratio, seed): + """Split provided training data into training set and validation set""" + valid_set_size = int(valid_ratio * len(data_set)) + train_set_size = len(data_set) - valid_set_size + train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], + generator=torch.Generator().manual_seed(seed)) + return np.array(train_set), np.array(valid_set) + + +def predict(test_loader, model, device): + model.eval() # Set your model to evaluation mode. + preds = [] + for x in tqdm(test_loader): + x = x.to(device) + with torch.no_grad(): + pred = model(x) + preds.append(pred.detach().cpu()) + preds = torch.cat(preds, dim=0).numpy() + return preds + + +class COVID19Dataset(Dataset): + """ + x: Features. + y: Targets, if none, do prediction. + """ + + def __init__(self, x, y=None): + if y is None: + self.y = y + else: + self.y = torch.FloatTensor(y) + self.x = torch.FloatTensor(x) + + def __getitem__(self, idx): + if self.y is None: + return self.x[idx] + else: + return self.x[idx], self.y[idx] + + def __len__(self): + return len(self.x) + + +class MyModel(nn.Module): + def __init__(self, input_dim): + super(MyModel, self).__init__() + self.layers = nn.Sequential( + nn.Linear(input_dim, 16), + nn.ReLU(), + nn.Linear(16, 4), + nn.ReLU(), + nn.Linear(4, 2), + nn.ReLU(), + nn.Linear(2, 1) + ) + + def forward(self, x): + x = self.layers(x) + x = x.squeeze(1) # (B, 1) -> (B) + return x + + +def select_feat(train_data, valid_data, test_data, select_all=True): + """Selects useful features to perform regression""" + y_train, y_valid = train_data[:, -1], valid_data[:, -1] + raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data + + if select_all: + feat_idx = list(range(raw_x_train.shape[1])) + else: + selection = SelectFromModel(RidgeCV()).fit(raw_x_train, y_train) + feat_idx = [i for i, support in enumerate(selection.get_support()) if support] + print('selected feature list: ', feat_idx) + + return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], y_train, y_valid + + +def trainer(train_loader, valid_loader, model, config, device): + criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this. + + # Define your optimization algorithm. + # optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.1) + optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.1, lr=1e-3) + + # writer = SummaryWriter() # Writer of tensoboard. + + if not os.path.isdir('./models'): + os.mkdir('./models') # Create directory of saving models. + + n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0 + + for epoch in range(n_epochs): + model.train() # Set your model to train mode. + loss_record = [] + + # tqdm is a package to visualize your training progress. + train_pbar = tqdm(train_loader, position=0, leave=True) + + for x, y in train_pbar: + optimizer.zero_grad() # Set gradient to zero. + x, y = x.to(device), y.to(device) # Move your data to device. + pred = model(x) + loss = criterion(pred, y) + loss.backward() # Compute gradient(backpropagation). + optimizer.step() # Update parameters. + step += 1 + loss_record.append(loss.detach().item()) + + # Display current epoch number and loss on tqdm progress bar. + train_pbar.set_description(f'Epoch [{epoch + 1}/{n_epochs}]') + train_pbar.set_postfix({'loss': loss.detach().item()}) + + mean_train_loss = sum(loss_record) / len(loss_record) + # writer.add_scalar('Loss/train', mean_train_loss, step) + + model.eval() # Set your model to evaluation mode. + loss_record = [] + for x, y in valid_loader: + x, y = x.to(device), y.to(device) + with torch.no_grad(): + pred = model(x) + loss = criterion(pred, y) + + loss_record.append(loss.item()) + + mean_valid_loss = sum(loss_record) / len(loss_record) + print(f'Epoch [{epoch + 1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}') + # writer.add_scalar('Loss/valid', mean_valid_loss, step) + + if mean_valid_loss < best_loss: + best_loss = mean_valid_loss + torch.save(model.state_dict(), config['save_path']) # Save your best model + print('Saving model with loss {:.3f}...'.format(best_loss)) + early_stop_count = 0 + else: + early_stop_count += 1 + + if early_stop_count >= config['early_stop']: + print('\nModel is not improving, so we halt the training session.') + print('Final best loss: ', best_loss) + return best_loss + print('Final best loss: ', best_loss) + return best_loss + + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +config = { + 'seed': 12345, # Your seed number, you can pick your lucky number. :) + 'select_all': False, # Whether to use all features. + 'valid_ratio': 0.2, # validation_size = train_size * valid_ratio + 'n_epochs': 10000, # Number of epochs. + 'batch_size': 512, + 'early_stop': 500, # If model has not improved for this many consecutive epochs, stop training. + 'save_path': './models/model.ckpt' # Your model will be saved here. +} + +# Set seed for reproducibility +same_seed(config['seed']) + +# train_data size: 2699 x 118 (id + 37 states + 16 features x 5 days) +# test_data size: 1078 x 117 (without last day's positive rate) +train_data, test_data = pd.read_csv('./covid.train.csv').values, pd.read_csv('./covid.test.csv').values +train_data, valid_data = train_valid_split(train_data, config['valid_ratio'], config['seed']) + +# Print out the data size. +print(f"""train_data size: {train_data.shape} +valid_data size: {valid_data.shape} +test_data size: {test_data.shape}""") + +# Select features +x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all']) + +# Print out the number of features. +print(f'number of features: {x_train.shape[1]}') + +train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \ + COVID19Dataset(x_valid, y_valid), \ + COVID19Dataset(x_test) + +# Pytorch data loader loads pytorch dataset into batches. +train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True) +valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True) +test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True) + +if __name__ == '__main__': + """ + Final result: + Score: 0.92776 + Public score: 0.87331 + """ + model = MyModel(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device. + trainer(train_loader, valid_loader, model, config, device) + + # best_final_loss = math.inf + best_seed = config['seed'] + # for i in range(1000): + # config['seed'] = i + # model = MyModel(input_dim=x_train.shape[1]).to(device) + # best_loss = trainer(train_loader, valid_loader, model, config, device) + # if best_loss < best_final_loss: + # best_seed = i + + config['seed'] = best_seed + model = MyModel(input_dim=x_train.shape[1]).to(device) + trainer(train_loader, valid_loader, model, config, device) + + + def save_pred(preds, file): + """ Save predictions to specified file """ + with open(file, 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['id', 'tested_positive']) + for i, p in enumerate(preds): + writer.writerow([i, p]) + + + print(config) + model = MyModel(input_dim=x_train.shape[1]).to(device) + model.load_state_dict(torch.load(config['save_path'])) + preds = predict(test_loader, model, device) + save_pred(preds, 'pred.csv')