From 5435344197276dc17c48aa9d1dfa9a244153965f Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Fri, 27 Aug 2021 20:42:09 +0800 Subject: [PATCH 01/11] Add files via upload --- ActivationPrune.py | 134 +++++++++ ActivationPrune.xlsx | Bin 0 -> 11602 bytes Conv2dNew.py | 197 ++++++++++++++ K_means.py | 153 +++++++++++ Op.py | 46 ++++ WeightPrune.py | 183 +++++++++++++ main.py | 48 ++++ model.py | 636 +++++++++++++++++++++++++++++++++++++++++++ train.py | 288 ++++++++++++++++++++ 9 files changed, 1685 insertions(+) create mode 100644 ActivationPrune.py create mode 100644 ActivationPrune.xlsx create mode 100644 Conv2dNew.py create mode 100644 K_means.py create mode 100644 Op.py create mode 100644 WeightPrune.py create mode 100644 main.py create mode 100644 model.py create mode 100644 train.py diff --git a/ActivationPrune.py b/ActivationPrune.py new file mode 100644 index 0000000..bf511a3 --- /dev/null +++ b/ActivationPrune.py @@ -0,0 +1,134 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +import time +from model import * +from train import * +import random +# from .model import ResNetBasicBlock + +from math import sqrt +import copy +from time import time +from Conv2dNew import Execution + + +class Conv2dTest(nn.Conv2d): + def __init__(self, + ratio, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + ): + super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, + bias, padding_mode) + self.ratio = ratio + def forward(self, input): + E = Execution(self.ratio) + output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) + return output + +class LinearTest(nn.Linear): + def __init__(self, + in_features, + out_features, + bias=True, + ): + super(LinearTest, self).__init__(in_features, out_features, bias) + + def forward(self, input): + output = F.linear(input, self.weight, self.bias) + return output + +def prepare(model, ratio,inplace=False): + # move intpo prepare + def addActivationPruneOp(module): + nonlocal layer_cnt + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + p_name = str(layer_cnt) + activationPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationPruneConv.bias = child.bias + activationPruneConv.weight = child.weight + module._modules[name] = activationPruneConv + layer_cnt += 1 + elif isinstance(child, nn.Linear): + p_name = str(layer_cnt) + activationPruneLinear = LinearTest( + child.in_features, child.out_features, + bias=(child.bias is not None) + ) + if child.bias is not None: + activationPruneLinear.bias = child.bias + activationPruneLinear.weight = child.weight + module._modules[name] = activationPruneLinear + layer_cnt += 1 + else: + addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + layer_cnt = 0 + if not inplace: + model = copy.deepcopy(model) + addActivationPruneOp( model) # 为每一个卷积层添加输入特征图剪枝操作 + return model + +def getModel(modelName): + if modelName == 'LeNet': + return getLeNet() # 加载原始模型框架 + elif modelName == 'AlexNet': + return getAlexnet() + elif modelName == 'VGG16': + return get_vgg16() + elif modelName == 'SqueezeNet': + return get_squeezenet() + elif modelName == 'ResNet': + return get_resnet18() + +def getDataSet(modelName,batchSize,imgSize): + if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet': + dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', + img_size=imgSize) # 确定数据集 + elif modelName == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) + + return dataloaders,dataset_sizes + +def getPruneModel(model_name, weight_file_path,pattern,ratio): + model_orign = getModel(model_name) + if pattern == 'test' or pattern == 'retrain': + model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 + activationPruneModel = prepare(model_orign,ratio) + + return activationPruneModel + +def activationPruneModelOp(model_name, batch_size, img_size,pattern,ratio,epoch): + dataloaders, dataset_sizes = getDataSet(model_name, batch_size, img_size) + criterion = nn.CrossEntropyLoss() + + if pattern == 'retrain' or pattern == 'train': + weight_file_path = './pth/' + model_name + '/ratio=0'+ '/Activation' + '/best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.01, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, ratio, 'activation',pattern, criterion=criterion,optimizer=optimizer, name=model_name, + scheduler=scheduler, num_epochs=epoch, rerun=False) # 进行模型的训练 + if pattern == 'test': + weight_file_path = './pth/' + model_name + '/ratio=' + str(ratio) + '/Activation/' + 'best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + test_model(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion) + + diff --git a/ActivationPrune.xlsx b/ActivationPrune.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e05380510ab4e69aeb3bdbcaa8d7a2ad2f7c1e8c GIT binary patch literal 11602 zcmeIYg;yNg(mo6fGPn~oxVsaa!GpW&;BLX)HCWI9AwUT38r*`ryCwuD5Zr!~bMC#l zC+B?MKk(jOYo@zbcRk&EcU3*Lt4di80E+_!4+VsRf+B}<wMpwh^d8U2WJy0Xm-8iJ78+fcg!K4d)N?mW3kvycODFAR=FCZ*wi2jaO`Q zFo92{?_6=0e8mcZ0DdDm3b`-FsH=8+pWW_YUkkpa{C zQXih#pq;fZ`z7#u!YGm0u=nOZp>Wo_skfGS`IPQVd#l7YTC(&$+tEOE&2jCxKmr4p^{$2J1a9gI4N;8kJ49|kG=%vhTd9uQ@6vN zv~C#l_yhNC`k?R^Vv%*uf%A$!@Irn1rLYzt{kHQ`~k5v_L65@Vx46n;uy6z@8$8Ox z({7z#Au!Omq)IuLZT4ci&tA-4W=OyEqH}MHXDDqddM`J)PAN70U7{LeoJETe5&Hvi zIG#{OknU#%oi)?z3g}so+EICURU>cie)9O+fcezIJyem%7xITw8Mp(krk0Bpz5{mT z7dM0&YF00-t4woT_$j=M%iu-D&>BnjcoAuQyI2mlI-5DEzGX~+7fop?C9*qS;z+Wus=f7=W+ zgx(opl7?-1*-#{}!X85=+nqW5l*64ZCRNkz-}(nSz2TELMbPuxIk8pcAM8wU zs9~4szIqBb%jw~__f$3Rs{D88g96|0Zw8&SSb&>$yvt7&=HdSy*E<~Ri=Oz=G#)(i35_!`| z7X3bc?8&yJ#D+^q;9)ZH3o;(Z>BMI^iJ*2G%~L(vM9dxYyV1-`;&D*&o%Yc8(Nz2z zZi4nK5qR-l?EYfh9X_4VW+QAVaOpb~9{1oE^{Ff_jW%>B-VEyt{UhVG?2b=e1GNjg zB9Kr2cO9x+Zk~#RIN4fAY=jQE1ENF!tvzMx3Xb_~7;TkxPtZ4t)i?6WGLo6PodfPzc_mMN{mM!9KE9#w5lL zUIqKF8!She}^7nK>~YFOr(aC>Tk zG}U`FD@bN#q+B)F3QMP)+0nA2&j>Go)%Pgj+U1^~zfvq~mdy!8no-Y+8sWC80CZFQ z;4AW6YV8l@)lo$Szb74tBSe=m#H1bQ%G|04AEv@;GHyQWerK$`LXOoW99rT@6Nbg; z2S1^xbQ|IyOafXf$bCo;87IGaZ6@Gg<7r0S!&-wBuf2&FyAdEoIiYJ%*_+VHng=KC zz>=w`*=ly>cc5Em?LRf7ULv6>n_{@5Un?kvY1fyN{XCGN+1^6WFnGSt5Qp5-+isW0 z8oJElOzMne6wQeywk73z!X^(4})pj1)cKKO1{Fv)r&D?y?QI(lA0%sLmu5<5YG*oN zzJ@zMAUtf9A%+>ZXGn~B_&UyFZlCdkfUP|>f_HDfb!dNk+%rStyZma~c;E4Q*8Aob zU|yduJF@(%k-(Uze)b4q3Nb->#h#j6)pglSd*ii7(^3MB3B@+D4S{SvfUV`V%`L(| zY3{-KS=IdSs()*il>U>T}$)uFQ^kDBbM_8%r6KlUKq>;-(BUR}JHUGK?h{Wf_q zS7tqg^|Eg7u#k7$_I7)3xul@?X3lP@joVv6X=$?b>-oo}7fKr)pUd~g?lzNL=Z}88 ze>H-()XAzR@LU%?ew)*yr)W8D+4>0GCx<#}C!&G(joYQweWoTHwe$}TjUT1PKY5hwOO zS9uM+lazAP$DqXoh!3WxrA&}k=RnNGXu#;DS56+B_zr4D4ko)P1jt)rE8_aS4%7%j zIA-w*y2AE27oA9IsI9OWT&n^umR5Oe`x329fQ@H;-_QDnJ*#fgURx3`Dt(`y*gtoL z_sW0+kBWhYnju(O0t<)iU06Y|m_po!3Ijob3<^d8bj2AuPxra+2VbQyT6;?6iI zvI6C@Ua^X+J0tHpr6oQAMM;Mo=XEAyVmQGY{so8rn-<@jt*_=N0wa4JS4Xn(*2MC7 z2$XE;d%!tF@#Y^$VhS*pNDNGs33RG?p_&qj!Gtv9DueklQ2;TOHC{kV*yoMAG&W9x zV1LU;AQ~yofYPeS$$cxjHJCege$L%8TfEJhe(apeP)>i}nyy@~C0mn?2pFWEuAxl| z*E+G4_a?;(b?XU{xE;*>^pAd9z@9^{3vvYn;dIJhe%sZ`+}zFeUtz!>C+ztrw1g|12cKGE^Ug0{oW{TKyuA}FCbB?F zWSaQ|6Sf0>W>t0Sv&7sXR?WA+f`iB`m|!x^(w+%WRD2oLpP-G4TpgxD^?={w-@91- z8nHM*`0Ju0KT<)5{Eji1v++>b z7YW9~c``2sB#+JlO~np8E^}=C&|nMgM~2TY5&y~l6+{A=LLrNXK*FD^`ahV<&C1-~ zob`|YKZbp%Gw!&|gVT<_@I?0QShs2oVsXCDCBD1C`+oM*ZrChr;>;`6H-XCnwFDW)mQ)6x z?re7fNR7oCH71~1>^(vvJR||3a#>iT?~xW`1bHOSZZm4mG5wkJBPdul#QHC0-s!D& z7)tttVd4tdaksp`WjsU**|Rpd$QbQ^tLP_k+H?H514g+zj))6klWgT$4T>wHQ^u9$ z(|GRu@kIzsY0#Nm%!z;PdJ%!=XaG3Il{Y~IP$}9R5P&Q~EQKKnIesBr)mxww@B7E$ zN;hwt!@47(CtNhWtWx8IAqG$0g%xiCL4Jg$r@(2%sP|^8gc!G7iJUt-xOhRs7SX%6 zh8&72fkI4Gx(v@VoJP41GtVpwb2oAXOxFtck|4sO@))%Z{yt5_$e=hfS16Xtb`UxJ zw47$Je=|9jNus|E(a-?(%xRC94KrDGved;?n>f#{La+7qW;p;7>EGNnzB3fMV>$Uj zk#lgJ*5Utf7B6D7`~0&m{ASH>y_qyEd0zc zk>{3`cli?aUPj7VEjS-wMxCR>DBo49 z#WLsn$V6h!gE~K`A-DI!^hKHmuRPB4eUI$HsXL$C7g*g*X zb;)eVMY?Bz+t9j5Zx-}lD}ycqNy#ynWpc`DqFxg@L1*NEhqAhGTpengle;|Y3)W%m zt3N)ZS72dhAiCI`dxhdpC;GV)_R;BRzewhqt2R1Sqf^;gG4F;KvG__vaYBwxx0-Q` z086yjt`9s8DcgTD1e2(gnn9h0ke+d}9X*Dj$p9#EjPwFc$)!957b1}AP9N?2!r3@@ z+za!sjYeMB^S9=%wKzSAjUMNldq3;3tr4>HOFC3aI(j^z_L*<-y#N{5a1OXP3u~2L znC+VSNzn;mGDgQY=~lX?Cl-oZsS_dAzg@9g!(-RS(*po_#}jItIcVCcD1@UXVel@v z>o2m-yDzc?kUyK0S7y%A;2WKmyQ2*6q$`{Y*_6{<tXe6qxnm4HbzNT7ViV zmj+>8)9gMOkd(QZXJDvb3N6WjhG5*nlj296Ei=c|4Ktl@1@7F(q5IXYsU&9dRod@A zRtToe-O`OK6CD3FO7#<8nr`e0`xG z^)3AMr{!MJ7RJvg(5q;ho-DKrS2?d(-<6o}a7{Ap%8P64lvzKUM&}*0EgO=ZF;0tRFVO`(IgC#|9y!}2Lj9ApT;06v%>Rg1XS7FOFY{ouKl>&CY_+`r57(m( ziV;@^LRl*tH;Pu)Ni0fSgOz`1Do*OL`Xyy~qkHd7ik11kuvLxjw zmkEaA%+}%=(OK$LN)rR0dN~;gQQNMW@sK|}6A*R4yxOXK=J8lRJ9n3tpeOCGU?Ggw zPRnbAJ(rM0U~)JVdal$btR_ENfh29`HFRMMw6>Zm>}?R28Me^2lon!NoJ!FgK8_~I zrkXE~ZVRSV7em*E{!#GN6~U3m?#g{QP2z)lvN55OLiVz%b9#rF8#N!h!6-hUQO%MW z*AKmrEsiPVs=z^1!#TR|5pYERX04`wQERQAaidAt(5p)9));XdgB;`b>h;JbWjc&p8<-dXD%^bqv1oh3fHc{qZqN_z8nS&(~Y7FMama@W)kh zpCc2!iT!AE;I0H{NgvbPv&yeqKjZt_`6jnR#Q%P`$C1Q$nAtotT7XBEjI$DLnZ?I+ z<||8=!@#93nWu~D2J`g%T^b^PYzOU&RTtog@B;*I%6i1OIC=*|2+jmCu3Zp z45?HtQ$s=F|LGHcreOXR%Kr-D3v>-!_xZ5=)=M7H->id~ZUAqQ&rYA;6z7k{vo;d#Xh@66{wQa`g=;}CG;zYxC`{fWMGdxX4VH#Qn< zXg{<)JaTg$f1z*~o5>^KP~-QC$zn`;lCNXr=_Y>ps^oF*FglL}x~DqhMN5N_LjN|+ zI&MUnbE%qhsAGeM-+=D5c1P@6H3KuE$NV0Dc_CIp(wehxz;J6nRO$NFRIfzMn}hd! z4Z#I0mpaIoye(%lZgvs(grCGU1AK&5cV;(XamK&2+llZMbMo&@_osn1EXRGmU3p-c z-F)*weWJU+YFR#Y-&Ty^s4Oj9YG7fD4;!_(N_OTLJL61Vd@8k)VXKH~OhPqQlI*a+ zLLAqxqdo7C!OI>@*T48tzcP~5A$a2f(CRj9ef)5D>}cuVLT?hPQz`YTI{Sr(+pP@P zQn2F-+K-bOPscY``8+=U440B?cuv?`vHt3_oAl!g3w`g;z8j5@U*#pn$?R~9F2H_i|CBV!q9JDyYH)FbVG%Tr+CvuIA%U7QY z{EnM7!PhCpm&y;7}iT<6x-zqau0&}zOw6J#kPUMhsR|n(9`GmN7ly_kU$2C%E zg3Gq{#@?VU-q;VW(NU2gM$-(*uYO!Codw0GzY{7HS-uoLHN3IeL%<*W1FNP=)He^D z$`gwwJC4DlvDl_1HM|58PkeFpx$^l;Dtn}pCXB`5#Y;9et-M-ClR73zX=vGS0jz1wRrZU?$xVezuo5+}*l-ZJ52T(AKriBU`kFOWFdRPh%(Ffb%dN|3O~ z(K!)i1HUrUgzkvnh@ZQfS7RXBQTWb8_+Xm?1%Z>W;iukhohY%eQr=9k5}2L?+o?3< zVzq*Op*<{iT9gsU_#$Y-nlfcGzNjZSCFs5R2f!t096H)S<4=E!QcJy|Jj2LHuN`l9yMcdE+=R3+!;Ywg;w$nTX4ffL94x)$Cw zg$8!BCr|YU>4i%zQ8?;Vxj?a5+HY&S@|qo}cF(>6-mzN9hlgZ^Vfs|Tft;UP^x%Wu zdec=aKf6`?g34%DHBp3(92M2##{FX}=+K6K)8;r-%X9=^TGu<6=4Pln5^t#<4m5eX zrgv*Yg@gB;boD^R)N`v$?y1;2+I&~$79%YcYx>4ho@P>XslHA*&pMmpbErge&=T03 z6Dv#%Idsc(B%Iw>KQ;N~*wIx^F7iXMcWgSGP9!5o)LtlBC+T-pd-?+mxo92Z7;8GY z27IXmgfeD)QmuQu*u90CCbiikKXEOs(Vg5-f;BR7iXeg&@@f+hIH&<5jE+&RT;3|U zqAd5_E6vdo77tA-?1{y58X$XSs)d>jPk9}UJycO{|3lzv{hPJ73vE&<`y@!53DN3&x!?!+dh z*TAeyz9!ADv3AmJ&n%0Y3<$4f3(UYtQR$n?lYUU&LCm9?34yGiaUZzabn-3gy7ZhINA10?dm6G1C z@En~2D7v`RNd_tLq?0sD2qpc>Cb?B9PtD^bPA9p#XX#R)_if_X{PWB|ASxaUJetID zWdN4#k{f4rhqe6-9*}$UX`x=#{|7bbSbluTL4mDPF#kkW1lM-cU(P zoetfQ{cM-al}Y=HtR)vOC{vc=;qF_eg*@7kS{z zDmx{E3P`XEMG;O=dlsSqbjL{>6Jjgen$DyzTl&Lel-d!puxdJy1k+YrSD?Y1Sd#?w zeUV&74ad-`#zkU5muSC}tL>KyvM<;{ z{?Y5aN~{kh&L3Lz5Jk;d;IQ$U9PKi;tufKGTz7dlKer(5JkWpB@>c!fi~t;)<&q2)+2@E2@i2Nt(7v3B^WcV-iENZo80P!nw%B;AMh zOSh7l?(aQeghXZfX%duVgTEcc_P^(9l1;bEBa(z%w_kD0RVz+&jK!BRCq*poR!*_&SGlqU}~I;!dAmpy9!OX z*_s^`l^B$4g4c*BjRc?sh={``j034Mf(j@@5W_CXY%N1wM7gjrkf8~lf}mEBAW!*+ zndUe6ftmSpmLcB(bt zOjNyDpex{VXsTq$-<&$QgC{cGYV!=J9UX&F zHi=~Y@@Dqix8zE;yVhA_IMSHlZ>hN_`CZu~aHJnii&Mg05DdRugbfyX*2`VwV2x~U znv~b4gG}f*l60eJD`37@{&Hdyr%Yp#E+TZc9#xifIe~2`Ai!Vf{^9%6u49)hsf{0G zE!qDyKkE_|hnUA59)N%t5y}{XH`QqKo}wLEkOXjr0yqw=ErWLQbRC&jl4or@&AFqY zYos(G^!OvUw&4&{`4HsFwBr@?>~2T{`>p=d;br8p2~861^8>9*}nyU?rR}N z@?TwNzYG4g_vXI^7a&gH|GN|Ccbwn*FMc5rL#kfCbz}T4{Cf%fm+%}U`v?*KTS5GH zgx|}EzYu27{`2_%zoO#rfWKGeegV!w+Djm>^m~o&cYxnBq`v@S@ctarfAXcji~gQS z`6c=eQX~A=T>dSs@;l031EF6SP*BtmHT+vx^t<@)boL7%2a?}`yu@F~?RV+lx#E{} z0L>rLzcIx>GRNik',self.W_col,self.X_col) + (torch.unsqueeze(self.bias,1) ) + else: + output = torch.einsum('ij,jk->ik', self.W_col, self.X_col) + # Reshape into (n_filters, out_height, out_width, batch_size) + output = output.reshape(self.output_shape() + (batch_size, )) + # Redistribute axises so that batch size comes first + return output.permute(3,0,1,2) + + + def output_shape(self): + channels, height, width = self.input_shape + pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding) + output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride[0] + 1 + output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride[0] + 1 + return self.n_filters, int(output_height), int(output_width) + + def parameters(self): + return np.prod(self.W.shape) + np.prod(self.w0.shape) + + def compressionRateStatistics(self,input,andSum,compareRatio): + pruneNumber = 0 + zerosNumber = 0 + for i in range(input.shape[1]): + if andSum[i] == 0: + zerosNumber += 1 + if andSum[i] != 0 and andSum[i] <= compareRatio: + pruneNumber += 1 + print('pruneNumberRatio=', pruneNumber / (input.shape[1])) + print('zerosNumberRatio=', zerosNumber / (input.shape[1])) + + def accuracyTest(self,andSum): + for i in range(len(andSum)): + print(i,andSum[i]) + + def activationSlidePrune(self,input,compareRatio): + matrixOne = torch.ones(input.shape,device='cuda:0') + + x = torch.clone(torch.detach(input)) + andOp = torch.logical_and(matrixOne,x) + andSum = torch.sum(andOp,dim=0) + + # self.compressionRateStatistics(input,andSum,compareRatio) + # self.accuracyTest(andSum) + + x1 = x.permute(1,0) + x1[(andSum<=compareRatio),] = 0 + x = x1.permute(1,0) + return x + +# image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8) +# input_shape=image.squeeze().shape +# conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='same', stride=1) +# conv2d.initialize(None) +# output=conv2d.forward_pass(image,training=True) +# print(output.shape) \ No newline at end of file diff --git a/K_means.py b/K_means.py new file mode 100644 index 0000000..2a754db --- /dev/null +++ b/K_means.py @@ -0,0 +1,153 @@ +# 聚类算法 + +import random +import pandas as pd +import numpy as np +import copy +import math + + +# 计算距离 +def Dis(dataSet, centroids, k): + # 处理质心 + # 如果之前分类的个数不够k类 + if len(centroids) < k: + centroids = np.append(centroids, random.sample(list(dataSet), k-len(centroids)), axis=0) + + # 处理节点 + clalist=[] + for data in dataSet: + #(np.tile(a,(2,1))就是把a先沿x轴复制1倍,即没有复制,仍然是 [0,1,2]。 再把结果沿y方向复制2倍得到array([[0,1,2],[0,1,2]])) + diff = np.tile(data, (k, 1)) + mul_Diff = np.multiply(diff, centroids) + mul_Dist = np.sum(mul_Diff, axis=1) #和 (axis=1表示行) + clalist.append(mul_Dist) + clalist = np.array(clalist) #返回一个每个点到质点的距离len(dateSet)*k的数组 + return clalist + + +# 计算质心 +def classify(dataSet, centroids, k): + # 计算样本到质心的距离 + clalist = Dis(dataSet, centroids, k) + # 分组并计算新的质心 + minDistIndices = np.argmax(clalist, axis=1) #axis=1 表示求出每行的最小值的下标 + newCentroids = pd.DataFrame(dataSet).groupby(minDistIndices).mean() #DataFramte(dataSet)对DataSet分组,groupby(min)按照min进行统计分类,mean()对分类结果求均值 + newCentroids = newCentroids.values + + # 对新质心,也分配成1-value_sum的形式,否则会出现小数 + for centro in newCentroids: + # centro是一个一维向量 + sorted_data=np.argsort(centro) # 排序信息 + value = 1 + for valueIndex in sorted_data: + centro[valueIndex] = value + value += 1 + + # 计算变化量 + # 有可能新分类个数不够k + if len(newCentroids) != len(centroids): + changed = 1 # 肯定有变化 + else: + changed = newCentroids - centroids # 有可能没变化 + + return changed, newCentroids + + +#确定初始中心点 +def euler_distance(point1: list, point2: list) -> float: + """ + 计算两点之间的欧拉距离,支持多维 + distance = 0.0 + for a, b in zip(point1, point2): + distance += math.pow(a - b, 2) + return math.sqrt(distance) + """ + distance = 0.0 + for a, b in zip(point1, point2): + distance += a*b + return distance + + +def get_closest_dist(point, centroids): + min_dist = math.inf # 初始设为无穷大 + for i, centroid in enumerate(centroids): + dist = euler_distance(centroid, point) + if dist < min_dist: + min_dist = dist + return min_dist + + +def kpp_centers(data_set: list, k: int) -> list: + """ + 从数据集中返回 k 个对象可作为质心 + """ + cluster_centers = [] + cluster_centers.append(random.choice(data_set)) + d = [0 for _ in range(len(data_set))] + for _ in range(1, k): + total = 0.0 + for i, point in enumerate(data_set): + d[i] = get_closest_dist(point, cluster_centers) # 与最近一个聚类中心的距离 + total += d[i] + total *= random.random() + for i, di in enumerate(d): # 轮盘法选出下一个聚类中心; + total -= di + if total > 0: + continue + cluster_centers.append(data_set[i]) + break + return cluster_centers + + +# 使用k-means分类 +def kmeans(dataSet, k): + # 将dataSet预处理成为算距离需要使用的重要程度矩阵 + valueSet = np.zeros(dataSet.shape, dtype=int) # 初始矩阵 + for index in range(len(dataSet)): + data = dataSet[index] + value = valueSet[index] + sorted_data=list(map(abs,data)) # 绝对值 + sorted_data=np.argsort(sorted_data) # 排序信息 + i = 1 # 对于越小的值,分配的i越小 + for valueIndex in sorted_data: + value[valueIndex] = i + i += 1 + + # 随机取质心 + # centroids = random.sample(dataSet, k) + centroids=kpp_centers(valueSet, k) + + # 更新质心 直到变化量全为0 + i=100 + changed, newCentroids = classify(valueSet, centroids, k) + # while(i): #while np.any(changed != 0) + while np.any(changed != 0) and i > 0: + changed, newCentroids = classify(valueSet, newCentroids, k) + i=i-1 + print("第{}次迭代".format(100-i)) + + centroids = sorted(newCentroids.tolist()) #tolist()将矩阵转换成列表 sorted()排序 + + clalist = Dis(valueSet, centroids, k) + minDistIndices = np.argmax(clalist, axis=1) + return minDistIndices + + +def getCluster(input, clusters_num): + # 对卷积层聚类为4维,对全连接层聚类为2维 + if len(input.shape) == 2: # 如果是全连接层 + fcValues = input.detach().cpu().numpy() # 转成numpy + # input.shape[1]是聚类基本单位的数据个数 + clusterIndex = kmeans(fcValues, clusters_num) # 分类 + elif len(input.shape) == 4: # 卷积层 + kernel_size = input.shape[3] # 卷积核尺寸 + preShape = input.shape[:2] # 四维数据的前两维 + inputCut = input.view(preShape[0]*preShape[1], kernel_size*kernel_size) # 降维后的数据,四维到二维 + convValues = inputCut.detach().cpu().numpy() # 转成numpy + clusterIndex = kmeans(convValues, clusters_num) # 分类 + clusterIndex.resize(preShape) + else: + clusterIndex = None + + return clusterIndex \ No newline at end of file diff --git a/Op.py b/Op.py new file mode 100644 index 0000000..be0a88c --- /dev/null +++ b/Op.py @@ -0,0 +1,46 @@ +from ActivationPrune import activationPruneModelOp +from WeightPrune import weightPruneModelOp +import os +def makeDir(model_name,ratio,patternA): + if not os.path.exists('./pth/' + model_name + '/ratio=' + str(ratio)): # + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Activation') + if patternA != 'train': + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/ActivationWeight') + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Weight') + +def Op(operation,model_name,batch_size,img_size,ratio,epochA,epochAW,weightParameter,LinearParameter): + if operation == 'trainInitialModel': # 训练初始模型 + patternA = 'train' + ratio = 0 + makeDir(model_name,ratio,patternA) + activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + + if operation == 'onlyActivationPruneWithRetrain': # 只进行输入特征图的剪枝,不进行权重的聚类剪枝 + patternA = 'retrain' + makeDir(model_name,ratio,patternA) + activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + + if operation == 'onlyWeightPruneWithRetrain': + patternA = 'test' + patternW = 'train' + makeDir(model_name,ratio,patternA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter,LinearParameter) + + if operation == 'activationWeightPruneWithRetrain': + patternA = 'retrain' + patternW = 'retrain' + makeDir(model_name, ratio, patternA) + activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) + + if operation == 'onlyActivationPruneTest': + patternA = 'test' + makeDir(model_name, ratio, patternA) + activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) + + if operation == 'activationWeightPruneTest': + patternA = 'test' + patternW = 'test' + makeDir(model_name, ratio, patternA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) + diff --git a/WeightPrune.py b/WeightPrune.py new file mode 100644 index 0000000..88abc06 --- /dev/null +++ b/WeightPrune.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +import torch +import torchvision.transforms as transforms +import torch.optim as optim +from torch.utils.data import DataLoader +import torch.nn.utils.prune as prune +import pandas as pd +import numpy as np +from K_means import getCluster +import torch.nn as nn +from model import * +from train import * +from ActivationPrune import Conv2dTest,LinearTest +from torch.nn.parameter import Parameter + +def scp_upgrade(kernel,old_scp): + old_scp+=np.abs(kernel.cpu().detach().numpy()) + return old_scp + +def scp_binaeryzation(scps,C): + if len(scps.shape)==3: + for r in np.arange(0,scps.shape[0]): + series=pd.Series(scps[r].ravel()) + rank_info=series.rank() + for i in np.arange(0,scps[r].shape[0]): + for j in np.arange(0,scps[r].shape[1]): + index=i*scps[r].shape[0]+j + if(rank_info[index]<=C): + scps[r][i][j]=0 + else: + scps[r][i][j]=1 + + elif len(scps.shape)==2: + for r in np.arange(0,scps.shape[0]): + series=pd.Series(scps[r].ravel()) + rank_info=series.rank() + for i in np.arange(0,scps[r].shape[0]): + index=i + if(rank_info[index]<=C): + scps[r][i]=0 + else: + scps[r][i]=1 + +class PatternPruningMethod(prune.BasePruningMethod): + PRUNING_TYPE= "unstructured" + + def __init__(self, custers_num, cut_num, pruning_type): + self.clusters_num=custers_num + self.cut_num=cut_num + self.pruning_type=pruning_type + prune.BasePruningMethod.__init__(self) + + def compute_mask(self, t, default_mask): + mask=default_mask.clone()#复制一个mask大小等于当前层的filter + if self.pruning_type=='conv': + scps=np.zeros(self.clusters_num*default_mask.shape[-1]*default_mask.shape[-1])#复制num个scp,表示每一个卷积族的pattern + scps.resize(self.clusters_num,default_mask.shape[-1],default_mask.shape[-1]) + + clusters=getCluster(t,self.clusters_num)#输入当前层的filter,获得其聚类信息 + + print(clusters) + + for i in np.arange(0,clusters.shape[0]):#遍历所有kernel,计算所有cluster的scp + for j in np.arange(0,clusters.shape[1]): + scp_upgrade(t[i][j],scps[clusters[i][j]]) + + scp_binaeryzation(scps,self.cut_num)#根据scp二值化获得真正的pattern + print(scps) + + for i in np.arange(0,clusters.shape[0]):#根据scp和每个kernel的族编号得到最终的mask + for j in np.arange(0,clusters.shape[1]): + mask[i][j]=torch.from_numpy(scps[clusters[i][j]]) + + elif self.pruning_type=='full': + + scps=np.zeros(self.clusters_num*default_mask.shape[-1]) + scps.resize(self.clusters_num,default_mask.shape[-1]) + + clusters=getCluster(t,self.clusters_num) + + print(clusters) + + for i in np.arange(0,clusters.shape[0]): + scp_upgrade(t[i],scps[int(clusters[i])]) + + scp_binaeryzation(scps,self.cut_num)#根据scp二值化获得真正的pattern + print(scps) + + for i in np.arange(0,clusters.shape[0]):#根据scp和每个kernel的族编号得到最终的mask + mask[i]=torch.from_numpy(scps[int(clusters[i])]) + + + return mask + +def weightPrune(model_name,ratio,weightPrameter,LinearPrameter,inplace=False): + def activationWeightPruneOp(module): + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + print(child) + print(child.weight.shape) + print('custers_num=6', 'cut_num=', child.weight.shape[-1] * child.weight.shape[-2] / weightPrameter, + 'pruning_type=conv') + convPruning = PatternPruningMethod(custers_num=6, + cut_num=child.weight.shape[-1] * child.weight.shape[-2] / weightPrameter, + pruning_type='conv') + convPruning.apply(child, 'weight', 6, child.weight.shape[-1] * child.weight.shape[-2] / weightPrameter, 'conv') + + # 针对输入特征图添加剪枝操作 + activationWeightPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationWeightPruneConv.bias = child.bias + activationWeightPruneConv.weight = Parameter(child.weight) + module._modules[name] = activationWeightPruneConv + child._forward_pre_hooks + + elif isinstance(child, nn.Linear): + print(child) + print(child.weight.shape) + print('custers_num=4', 'cut_num=', child.weight.shape[-1] / LinearPrameter, 'pruning_type=full') + fullPruning = PatternPruningMethod(custers_num=8, cut_num=child.weight.shape[-1] / LinearPrameter, + pruning_type='full') + fullPruning.apply(child, 'weight', 8, child.weight.shape[-1] / LinearPrameter, 'full') + child._forward_pre_hooks + else: + activationWeightPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + if not inplace: + model = copy.deepcopy(model_name) + activationWeightPruneOp( model_name) # 为每一层添加量化操作 + return model + +def getModel(modelName): + if modelName == 'LeNet': + return getLeNet() # 加载原始模型框架 + elif modelName == 'AlexNet': + return getAlexnet() + elif modelName == 'VGG16': + return get_vgg16() + elif modelName == 'SqueezeNet': + return get_squeezenet() + elif modelName == 'ResNet': + return get_resnet18() + +def getDataSet(modelName,batchSize,imgSize): + if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet': + dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', + img_size=imgSize) # 确定数据集 + elif modelName == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) + + return dataloaders,dataset_sizes + +def weightPruneModelOp(model_name,batch_size,img_size,ratio,pattern,epoch,weightParameter,LinearParameter): + net = getModel(model_name) + dataloaders, dataset_sizes = getDataSet(model_name,batch_size,img_size) + criterion = nn.CrossEntropyLoss() + if pattern == 'retrain' or pattern == 'train': + if pattern == 'retrain': + getPth = './pth/' + model_name + '/ratio=' +str(ratio)+ '/Activation' + '/best.pth' #读取经过输入特征图剪枝训练后的权重模型 + else: + getPth = './pth/' + model_name + '/ratio=0' + '/Activation' + '/best.pth' + optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + net.load_state_dict(torch.load(getPth)) + weightPrune(net, ratio ,weightParameter,LinearParameter) + train_model_jiang(net,dataloaders, dataset_sizes,ratio,'weight', pattern,criterion=criterion, optimizer=optimizer, name=model_name, + scheduler=scheduler, num_epochs=epoch, rerun=False) + + if pattern == 'test': + getPth = './pth/' + model_name+ '/ratio=' +str(ratio)+ '/ActivationWeight/' + 'best.pth' + weightPrune(net, ratio,weightParameter,LinearParameter) + net.load_state_dict(torch.load(getPth)) + test_model(net, dataloaders, dataset_sizes, criterion=criterion) + + + + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..e8cfdc7 --- /dev/null +++ b/main.py @@ -0,0 +1,48 @@ +from ActivationPrune import * +from WeightPrune import weightPruneModelOp +import os +from Op import Op + +if __name__ == '__main__': + model_name = 'AlexNet' # 确定模型名称 + batch_size = 1 # 确定批训练图片数目 + img_size = 227 # 确定单张图片大小 + ratio = 0.1 # 确定输入特征图剪枝比率 + epochA = 30 # 确定针对输入特征图剪枝重训练轮数或原始模型(不掺杂任何剪枝训练)轮数 + epochAW = 40 # 确定针对卷积核聚类剪枝重训练轮数 + weightParameter = (4/1) + LinearParameter = 4 + ''' + 一共设置有六种针对模型的操作 + 1. operation = 'trainInitialModel',意为训练初始模型,此时不参杂任何剪枝操作,单纯训练初始模型 + 2. operation = 'onlyActivationPruneWithRetrain',意为只针对输入特征图进行剪枝,并进行重训练 + 3. operation = 'onlyWeightPruneWithRetrain',意为只针对权重值进行聚类剪枝,并进行重训练 + 4. operation = 'activationWeightPruneWithRetrain',意为对输入特征图剪枝并进行重训练,对其生成的模型权重进行聚类剪枝并进行重训练 + 5. operation = 'onlyActivationPruneTest',意为只针对输入特征图剪枝后的模型进行inferernce,测试模型精度 + 6. operation = 'activationWeightPruneTest',意为针对输入特征图与权重聚类剪枝后的模型进行inference,测试模型精度 + ''' + operation = 'trainInitialModel' + Op(operation,model_name,batch_size,img_size,ratio,epochA,epochAW,weightParameter,LinearParameter) + + + + + + + + + + + + + + + # if not os.path.exists('./pth/'+model_name+'/ratio='+str(ratio)): # + # os.makedirs('./pth/'+model_name+'/ratio='+str(ratio)+'/Activation') + # if patternA != 'train': + # os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/ActivationWeight') + # os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Weight') + # + # # activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + # if patternA != 'train' and not(patternA == 'test' and ratio == 0): + # weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW,epochAW,weightParameter,LinearParameter) \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..aae0ea7 --- /dev/null +++ b/model.py @@ -0,0 +1,636 @@ +from collections import OrderedDict +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch.nn.functional as F +import math +import torch + +class AlexNet(nn.Module): + + def __init__(self, num_classes=10): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(96, 256, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(256, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + # nn.Softmax() + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + return x +class LeNet(nn.Module): + def __init__(self, num_classes=10): + super(LeNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 6, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(6, 16, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(16, 120, kernel_size=5), + nn.ReLU(inplace=True) + ) + self.classifier = nn.Sequential( + nn.Linear(120, 84), + nn.ReLU(inplace=True), + nn.Linear(84, num_classes) + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + +class VGG(nn.Module): + + def __init__(self, features, num_classes=10): + super(VGG, self).__init__() + self.features = features + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + self._initialize_weights() + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + + +class ResNet(nn.Module): + def __init__(self, block, layers, num_classes=10): + self.inplanes = 64 + super(ResNet, self).__init__() + + m = OrderedDict() + m['conv1'] = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + m['bn1'] = nn.BatchNorm2d(64) + m['relu1'] = nn.ReLU(inplace=True) + m['maxpool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.group1 = nn.Sequential(m) + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + self.avgpool = nn.Sequential(nn.AvgPool2d(7)) + + self.group2 = nn.Sequential( + OrderedDict([ + ('fc', nn.Linear(512 * block.expansion, num_classes)) + ]) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.group1(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.group2(x) + + return x + + +class ResNetBasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(ResNetBasicBlock, self).__init__() + m = OrderedDict() + m['conv1'] = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + m['relu1'] = nn.ReLU(inplace=True) + m['conv2'] = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.group1 = nn.Sequential(m) + self.relu = nn.Sequential(nn.ReLU(inplace=True)) + self.downsample = downsample + + def forward(self, x): + if self.downsample is not None: + residual = self.downsample(x) + else: + residual = x + out = self.group1(x) + residual + out = self.relu(out) + return out + +class Fire(nn.Module): + + def __init__(self, inplanes, squeeze_planes, + expand1x1_planes, expand3x3_planes): + super(Fire, self).__init__() + self.inplanes = inplanes + + self.group1 = nn.Sequential( + OrderedDict([ + ('squeeze', nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)), + ('squeeze_activation', nn.ReLU(inplace=True)) + ]) + ) + + self.group2 = nn.Sequential( + OrderedDict([ + ('expand1x1', nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1)), + ('expand1x1_activation', nn.ReLU(inplace=True)) + ]) + ) + + self.group3 = nn.Sequential( + OrderedDict([ + ('expand3x3', nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)), + ('expand3x3_activation', nn.ReLU(inplace=True)) + ]) + ) + + def forward(self, x): + x = self.group1(x) + return torch.cat([self.group2(x), self.group3(x)], 1) + + +class SqueezeNet(nn.Module): + + def __init__(self, num_classes=1000): + super(SqueezeNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(512, 64, 256, 256), + ) + # Final convolution is initialized differently form the rest + final_conv = nn.Conv2d(512, num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=0.5), + final_conv, + nn.ReLU(inplace=True), + nn.AvgPool2d(13) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + gain = 2.0 + if m is final_conv: + m.weight.data.normal_(0, 0.01) + else: + fan_in = m.kernel_size[0] * m.kernel_size[1] * m.in_channels + u = math.sqrt(3.0 * gain / fan_in) + m.weight.data.uniform_(-u, u) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.features(x) + x = self.classifier(x) + return x.view(x.size(0), self.num_classes) + + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=False, transform_input=False): + super(Inception3, self).__init__() + self.aux_logits = aux_logits + self.transform_input = transform_input + self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, stride=2) + self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) + self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) + self.Mixed_5b = InceptionA(192, pool_features=32) + self.Mixed_5c = InceptionA(256, pool_features=64) + self.Mixed_5d = InceptionA(288, pool_features=64) + self.Mixed_6a = InceptionB(288) + self.Mixed_6b = InceptionC(768, channels_7x7=128) + self.Mixed_6c = InceptionC(768, channels_7x7=160) + self.Mixed_6d = InceptionC(768, channels_7x7=160) + self.Mixed_6e = InceptionC(768, channels_7x7=192) + if aux_logits: + self.AuxLogits = InceptionAux(768, num_classes) + self.Mixed_7a = InceptionD(768) + self.Mixed_7b = InceptionE(1280) + self.Mixed_7c = InceptionE(2048) + self.group1 = nn.Sequential( + OrderedDict([ + ('fc', nn.Linear(2048, num_classes)) + ]) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + stddev = m.stddev if hasattr(m, 'stddev') else 0.1 + X = stats.truncnorm(-2, 2, scale=stddev) + values = torch.Tensor(X.rvs(m.weight.data.numel())) + m.weight.data.copy_(values.reshape(m.weight.shape)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def forward(self, x): + if self.transform_input: + x = x.clone() + x[0] = x[0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x[1] = x[1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x[2] = x[2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + # 299 x 299 x 3 + x = self.Conv2d_1a_3x3(x) + # 149 x 149 x 32 + x = self.Conv2d_2a_3x3(x) + # 147 x 147 x 32 + x = self.Conv2d_2b_3x3(x) + # 147 x 147 x 64 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # 73 x 73 x 64 + x = self.Conv2d_3b_1x1(x) + # 73 x 73 x 80 + x = self.Conv2d_4a_3x3(x) + # 71 x 71 x 192 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # 35 x 35 x 192 + x = self.Mixed_5b(x) + # 35 x 35 x 256 + x = self.Mixed_5c(x) + # 35 x 35 x 288 + x = self.Mixed_5d(x) + # 35 x 35 x 288 + x = self.Mixed_6a(x) + # 17 x 17 x 768 + x = self.Mixed_6b(x) + # 17 x 17 x 768 + x = self.Mixed_6c(x) + # 17 x 17 x 768 + x = self.Mixed_6d(x) + # 17 x 17 x 768 + x = self.Mixed_6e(x) + # 17 x 17 x 768 + if self.training and self.aux_logits: + aux = self.AuxLogits(x) + # 17 x 17 x 768 + x = self.Mixed_7a(x) + # 8 x 8 x 1280 + x = self.Mixed_7b(x) + # 8 x 8 x 2048 + x = self.Mixed_7c(x) + # 8 x 8 x 2048 + x = F.avg_pool2d(x, kernel_size=8) + # 1 x 1 x 2048 + x = F.dropout(x, training=self.training) + # 1 x 1 x 2048 + x = x.view(x.size(0), -1) + # 2048 + x = self.group1(x) + # 1000 (num_classes) + if self.training and self.aux_logits: + return x, aux + return x + + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features): + super(InceptionA, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1) + + self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1) + self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1) + + self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionB(nn.Module): + + def __init__(self, in_channels): + super(InceptionB, self).__init__() + self.branch3x3 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + + outputs = [branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7): + super(InceptionC, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1) + + c7 = channels_7x7 + self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1) + self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + + self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + + self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionD(nn.Module): + + def __init__(self, in_channels): + super(InceptionD, self).__init__() + self.branch3x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) + self.branch3x3_2 = BasicConv2d(192, 320, kernel_size=3, stride=2) + + self.branch7x7x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = BasicConv2d(192, 192, kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionE(nn.Module): + + def __init__(self, in_channels): + super(InceptionE, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1) + + self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1) + self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes): + super(InceptionAux, self).__init__() + self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1) + self.conv1 = BasicConv2d(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + + fc = nn.Linear(768, num_classes) + fc.stddev = 0.001 + + self.group1 = nn.Sequential( + OrderedDict([ + ('fc', fc) + ]) + ) + + def forward(self, x): + # 17 x 17 x 768 + x = F.avg_pool2d(x, kernel_size=5, stride=3) + # 5 x 5 x 768 + x = self.conv0(x) + # 5 x 5 x 128 + x = self.conv1(x) + # 1 x 1 x 768 + x = x.view(x.size(0), -1) + # 768 + x = self.group1(x) + # 1000 + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.group1 = nn.Sequential( + OrderedDict([ + ('conv', nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)) + # ,('bn', nn.BatchNorm2d(out_channels, eps=0.001)) + ]) + ) + + def forward(self, x): + x = self.group1(x) + return F.relu(x, inplace=True) + + +def vgg_make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + + + +def getLeNet(num_classes=10): + model = LeNet(num_classes) + return model + +def getAlexnet(num_classes=10): + model = AlexNet(num_classes) + return model + +def get_vgg16(num_classes=10): + vgg16_setting = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] + model = VGG(vgg_make_layers(vgg16_setting), num_classes) + return model + + +def get_resnet18(num_classes=10): + model = ResNet(ResNetBasicBlock, [2, 2, 2, 2], num_classes) + return model + + +def get_squeezenet(num_classes=10): + model = SqueezeNet(num_classes) + return model + + +def get_inception_v3(num_classes=10): + model = Inception3(num_classes) + return model + + + + diff --git a/train.py b/train.py new file mode 100644 index 0000000..c1f5ecc --- /dev/null +++ b/train.py @@ -0,0 +1,288 @@ +from __future__ import print_function, division +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +import numpy as np +import torchvision +from torchvision import datasets, models, transforms +import matplotlib.pyplot as plt +import time +import os +import copy +from tqdm import tqdm +from collections import OrderedDict + +def download_mnist(save_path): + torchvision.datasets.MNIST(root=save_path,train=True,download=True) + torchvision.datasets.MNIST(root=save_path,train=False,download=True) + return save_path + +def load_mnist(batch_size=64,path='',img_size=32): + if img_size != 32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()] + ) + else: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + trainset = torchvision.datasets.MNIST(root=path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.MNIST(root=path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":60000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar10(save_path): + torchvision.datasets.CIFAR10(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR10(root=save_path,train=False,download=True) + return save_path + +def load_cifar10(batch_size=64,pth_path='./data',img_size=32): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR10(root=pth_path, train=True,download=False, transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.CIFAR10(root=pth_path, train=False,download=False, transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":50000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar100(save_path): + torchvision.datasets.CIFAR100(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR100(root=save_path,train=False,download=False) + return save_path + +def load_cifar100(batch_size,pth_path,img_size): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR100(root=pth_path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True,num_workers=2) + testset = torchvision.datasets.CIFAR100(root=pth_path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=True,num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_size ={"train":50000,"val":10000} + return dataloaders,dataset_size +def test_model(model,dataloaders,dataset_sizes,criterion): + print("validation model:") + phase = "val" + model.cuda() + model.eval() + with torch.no_grad(): + running_loss = 0.0 + running_acc = 0.0 + for inputs,labels in tqdm(dataloaders[phase]): + inputs,labels = inputs.cuda(),labels.cuda() + outputs = model(inputs) + _,preds = torch.max(outputs,1) + loss = criterion(outputs,labels) + running_loss += loss.item() * inputs.size(0) + running_acc += torch.sum(preds == labels.data) + epoch_loss = running_loss/dataset_sizes[phase] + epoch_acc = running_acc / dataset_sizes[phase] + epoch_acc = epoch_acc.item() + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + return epoch_acc,epoch_loss + +def WriteAccuracy(savePath, msg): + + full_path = savePath + '/Accuracy.txt' # 也可以创建一个.doc的word文档 + file = open(full_path, 'a') + file.write(msg) #msg也就是下面的Hello world! + # file.close() + +def train_model_jiang(model, dataloaders, dataset_sizes,ratio, type,pattern,criterion, optimizer, name,scheduler=None, num_epochs=100,rerun=False): + if rerun == True: + print('我进来了') + print(num_epochs) + since = time.time() + model.load_state_dict(torch.load('./test_20.pth')) + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + + model.cuda() + for epoch in range(20, num_epochs): + print('Epoch {}/{}'.format(epoch + 1, num_epochs)) + print('-' * 10) + print('the %d lr:%f' % (epoch + 1, optimizer.param_groups[0]['lr'])) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + i = 0 + loss_a = 0 + p = 0 + for data in dataloaders[phase]: + inputs, labels = data + inputs = inputs.cuda() + labels = labels.cuda() + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f' % (epoch + 1, i + 1, loss_a)) + loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = './test_{}.pth'.format(epoch+1) + torch.save(model.state_dict(), path) + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = './best.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + + if rerun == False: + since = time.time() + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + if type == 'activation': + savePth = './pth/'+name+'/ratio='+str(ratio)+'/Activation' + else: + if pattern == 'retrain': + savePth = './pth/'+name+'/ratio='+str(ratio)+'/ActivationWeight' + elif pattern == 'train': + savePth = './pth/' + name + '/ratio=' + str(ratio) + '/weight' + model.cuda() + for epoch in range(num_epochs): + print('Epoch {}/{}'.format(epoch+1, num_epochs)) + print('-' * 10) + print('the %d lr:%f'%(epoch+1,optimizer.param_groups[0]['lr'])) + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + running_loss = 0.0 + running_corrects = 0 + # Iterate over data. + i = 0 + # loss_a = 0 + # p = 0 + for data in dataloaders[phase]: + inputs,labels = data + inputs = inputs.cuda() + labels = labels.cuda() + # zero the parameter gradients + optimizer.zero_grad() + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f'%(epoch+1,i+1,loss_a)) + # loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = savePth+'/test_{}.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + WriteAccuracy(savePth, str((round(float(epoch_acc),4))*100) + '%-' +'epoch=' +str(epoch)) + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = savePth + '/best.pth' + torch.save(model.state_dict(), path) + return model From 07d498a6213a59f2bd0f059ba93c709f0b61981b Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Sun, 29 Aug 2021 21:09:43 +0800 Subject: [PATCH 02/11] Delete Op.py --- Op.py | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 Op.py diff --git a/Op.py b/Op.py deleted file mode 100644 index be0a88c..0000000 --- a/Op.py +++ /dev/null @@ -1,46 +0,0 @@ -from ActivationPrune import activationPruneModelOp -from WeightPrune import weightPruneModelOp -import os -def makeDir(model_name,ratio,patternA): - if not os.path.exists('./pth/' + model_name + '/ratio=' + str(ratio)): # - os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Activation') - if patternA != 'train': - os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/ActivationWeight') - os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Weight') - -def Op(operation,model_name,batch_size,img_size,ratio,epochA,epochAW,weightParameter,LinearParameter): - if operation == 'trainInitialModel': # 训练初始模型 - patternA = 'train' - ratio = 0 - makeDir(model_name,ratio,patternA) - activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) - - if operation == 'onlyActivationPruneWithRetrain': # 只进行输入特征图的剪枝,不进行权重的聚类剪枝 - patternA = 'retrain' - makeDir(model_name,ratio,patternA) - activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) - - if operation == 'onlyWeightPruneWithRetrain': - patternA = 'test' - patternW = 'train' - makeDir(model_name,ratio,patternA) - weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter,LinearParameter) - - if operation == 'activationWeightPruneWithRetrain': - patternA = 'retrain' - patternW = 'retrain' - makeDir(model_name, ratio, patternA) - activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) - weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) - - if operation == 'onlyActivationPruneTest': - patternA = 'test' - makeDir(model_name, ratio, patternA) - activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) - - if operation == 'activationWeightPruneTest': - patternA = 'test' - patternW = 'test' - makeDir(model_name, ratio, patternA) - weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) - From c06dc627479ae2ccd32d2e55dadc04562b18317a Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Sun, 29 Aug 2021 21:10:45 +0800 Subject: [PATCH 03/11] Add files via upload --- Op.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 Op.py diff --git a/Op.py b/Op.py new file mode 100644 index 0000000..ddbd7a0 --- /dev/null +++ b/Op.py @@ -0,0 +1,47 @@ +from ActivationPrune import activationPruneModelOp +from WeightPrune import weightPruneModelOp +import os +def makeDir(model_name,ratio,patternA): + if not os.path.exists('./pth/' + model_name + '/ratio=' + str(ratio)): # + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Activation') + if patternA != 'train': + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/ActivationWeight') + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Weight') + +def Op(operation,model_name,batch_size,img_size,ratio,epochA,epochAW,weightParameter,LinearParameter): + if operation == 'trainInitialModel': # 训练初始模型 + patternA = 'train' + ratio = 0 + makeDir(model_name,ratio,patternA) + activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + + if operation == 'onlyActivationPruneWithRetrain': # 只进行输入特征图的剪枝,不进行权重的聚类剪枝 + patternA = 'retrain' + makeDir(model_name,ratio,patternA) + activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + + if operation == 'onlyWeightPruneWithRetrain': + patternA = 'test' + patternW = 'train' + ratio = 0 + makeDir(model_name,ratio,patternA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter,LinearParameter) + + if operation == 'activationWeightPruneWithRetrain': + patternA = 'retrain' + patternW = 'retrain' + makeDir(model_name, ratio, patternA) + activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) + + if operation == 'onlyActivationPruneTest': + patternA = 'test' + makeDir(model_name, ratio, patternA) + activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) + + if operation == 'activationWeightPruneTest': + patternA = 'test' + patternW = 'test' + makeDir(model_name, ratio, patternA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) + From 359c70eac378b246a1ee4323d06b7e6465a84f30 Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 15:29:37 +0800 Subject: [PATCH 04/11] Delete train.py --- train.py | 288 ------------------------------------------------------- 1 file changed, 288 deletions(-) delete mode 100644 train.py diff --git a/train.py b/train.py deleted file mode 100644 index c1f5ecc..0000000 --- a/train.py +++ /dev/null @@ -1,288 +0,0 @@ -from __future__ import print_function, division -import torch -import torch.nn as nn -import torch.optim as optim -from torch.optim import lr_scheduler -import numpy as np -import torchvision -from torchvision import datasets, models, transforms -import matplotlib.pyplot as plt -import time -import os -import copy -from tqdm import tqdm -from collections import OrderedDict - -def download_mnist(save_path): - torchvision.datasets.MNIST(root=save_path,train=True,download=True) - torchvision.datasets.MNIST(root=save_path,train=False,download=True) - return save_path - -def load_mnist(batch_size=64,path='',img_size=32): - if img_size != 32: - transform = transforms.Compose( - [transforms.Resize((img_size,img_size)), - transforms.ToTensor()]) - test_transform = transforms.Compose( - [transforms.Resize((img_size,img_size)), - transforms.ToTensor()] - ) - else: - transform = transforms.Compose( - [transforms.Resize((img_size,img_size)), - transforms.ToTensor()]) - test_transform = transforms.Compose( - [transforms.Resize((img_size,img_size)), - transforms.ToTensor()]) - trainset = torchvision.datasets.MNIST(root=path,train=True,download=False,transform=transform) - trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) - testset = torchvision.datasets.MNIST(root=path,train=False,download=False,transform=test_transform) - testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) - dataloaders = {"train":trainloader,"val":testloader} - dataset_sizes = {"train":60000,"val":10000} - return dataloaders,dataset_sizes - -def download_cifar10(save_path): - torchvision.datasets.CIFAR10(root=save_path,train=True,download=True) - torchvision.datasets.CIFAR10(root=save_path,train=False,download=True) - return save_path - -def load_cifar10(batch_size=64,pth_path='./data',img_size=32): - if img_size!=32: - transform = transforms.Compose( - [transforms.Resize((img_size,img_size)), - transforms.ToTensor()]) - test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) - ,transforms.ToTensor()]) - else: - transform = transforms.Compose([transforms.Pad(padding = 4), - transforms.RandomCrop(32), - transforms.RandomHorizontalFlip(),transforms.ToTensor()]) - test_transform = transforms.Compose([transforms.ToTensor()]) - trainset = torchvision.datasets.CIFAR10(root=pth_path, train=True,download=False, transform=transform) - trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) - testset = torchvision.datasets.CIFAR10(root=pth_path, train=False,download=False, transform=test_transform) - testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) - dataloaders = {"train":trainloader,"val":testloader} - dataset_sizes = {"train":50000,"val":10000} - return dataloaders,dataset_sizes - -def download_cifar100(save_path): - torchvision.datasets.CIFAR100(root=save_path,train=True,download=True) - torchvision.datasets.CIFAR100(root=save_path,train=False,download=False) - return save_path - -def load_cifar100(batch_size,pth_path,img_size): - if img_size!=32: - transform = transforms.Compose( - [transforms.Resize((img_size,img_size)), - transforms.ToTensor()]) - test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) - ,transforms.ToTensor()]) - else: - transform = transforms.Compose([transforms.Pad(padding = 4), - transforms.RandomCrop(32), - transforms.RandomHorizontalFlip(),transforms.ToTensor()]) - test_transform = transforms.Compose([transforms.ToTensor()]) - trainset = torchvision.datasets.CIFAR100(root=pth_path,train=True,download=False,transform=transform) - trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True,num_workers=2) - testset = torchvision.datasets.CIFAR100(root=pth_path,train=False,download=False,transform=test_transform) - testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=True,num_workers=2) - dataloaders = {"train":trainloader,"val":testloader} - dataset_size ={"train":50000,"val":10000} - return dataloaders,dataset_size -def test_model(model,dataloaders,dataset_sizes,criterion): - print("validation model:") - phase = "val" - model.cuda() - model.eval() - with torch.no_grad(): - running_loss = 0.0 - running_acc = 0.0 - for inputs,labels in tqdm(dataloaders[phase]): - inputs,labels = inputs.cuda(),labels.cuda() - outputs = model(inputs) - _,preds = torch.max(outputs,1) - loss = criterion(outputs,labels) - running_loss += loss.item() * inputs.size(0) - running_acc += torch.sum(preds == labels.data) - epoch_loss = running_loss/dataset_sizes[phase] - epoch_acc = running_acc / dataset_sizes[phase] - epoch_acc = epoch_acc.item() - print('{} Loss: {:.4f} Acc: {:.4f}'.format( - phase, epoch_loss, epoch_acc)) - return epoch_acc,epoch_loss - -def WriteAccuracy(savePath, msg): - - full_path = savePath + '/Accuracy.txt' # 也可以创建一个.doc的word文档 - file = open(full_path, 'a') - file.write(msg) #msg也就是下面的Hello world! - # file.close() - -def train_model_jiang(model, dataloaders, dataset_sizes,ratio, type,pattern,criterion, optimizer, name,scheduler=None, num_epochs=100,rerun=False): - if rerun == True: - print('我进来了') - print(num_epochs) - since = time.time() - model.load_state_dict(torch.load('./test_20.pth')) - best_model_wts = copy.deepcopy(model.state_dict()) - best_acc = 0.0 - - model.cuda() - for epoch in range(20, num_epochs): - print('Epoch {}/{}'.format(epoch + 1, num_epochs)) - print('-' * 10) - print('the %d lr:%f' % (epoch + 1, optimizer.param_groups[0]['lr'])) - - # Each epoch has a training and validation phase - for phase in ['train', 'val']: - if phase == 'train': - model.train() # Set model to training mode - else: - print('val stage') - model.eval() # Set model to evaluate mode - - running_loss = 0.0 - running_corrects = 0 - - # Iterate over data. - i = 0 - loss_a = 0 - p = 0 - for data in dataloaders[phase]: - inputs, labels = data - inputs = inputs.cuda() - labels = labels.cuda() - - # zero the parameter gradients - optimizer.zero_grad() - - # forward - # track history if only in train - with torch.set_grad_enabled(phase == 'train'): - outputs = model(inputs) - _, preds = torch.max(outputs, 1) - loss = criterion(outputs, labels) - loss_a = loss.item() - print('[%d ,%5d] loss:%.3f' % (epoch + 1, i + 1, loss_a)) - loss_a = 0 - i += 1 - # backward + optimize only if in training phase - if phase == 'train': - loss.backward() - optimizer.step() - - # statistics - running_loss += loss.item() * inputs.size(0) - running_corrects += torch.sum(preds == labels.data) - if phase == 'train' and scheduler is not None: - scheduler.step() - - epoch_loss = running_loss / dataset_sizes[phase] - epoch_acc = running_corrects.double() / dataset_sizes[phase] - # epoch_loss = running_loss / p - # epoch_acc = running_corrects.double() / p - - print('{} Loss: {:.4f} Acc: {:.4f}'.format( - phase, epoch_loss, epoch_acc)) - - - # deep copy the model - if phase == 'val' and epoch_acc > best_acc: - best_acc = epoch_acc - best_model_wts = copy.deepcopy(model.state_dict()) - model.load_state_dict(best_model_wts) - path = './test_{}.pth'.format(epoch+1) - torch.save(model.state_dict(), path) - - time_elapsed = time.time() - since - print('Training complete in {:.0f}m {:.0f}s'.format( - time_elapsed // 60, time_elapsed % 60)) - print('Best val Acc: {:4f}'.format(best_acc)) - - # load best model weights - model.load_state_dict(best_model_wts) - path = './best.pth'.format(epoch + 1) - torch.save(model.state_dict(), path) - - if rerun == False: - since = time.time() - best_model_wts = copy.deepcopy(model.state_dict()) - best_acc = 0.0 - if type == 'activation': - savePth = './pth/'+name+'/ratio='+str(ratio)+'/Activation' - else: - if pattern == 'retrain': - savePth = './pth/'+name+'/ratio='+str(ratio)+'/ActivationWeight' - elif pattern == 'train': - savePth = './pth/' + name + '/ratio=' + str(ratio) + '/weight' - model.cuda() - for epoch in range(num_epochs): - print('Epoch {}/{}'.format(epoch+1, num_epochs)) - print('-' * 10) - print('the %d lr:%f'%(epoch+1,optimizer.param_groups[0]['lr'])) - # Each epoch has a training and validation phase - for phase in ['train', 'val']: - if phase == 'train': - model.train() # Set model to training mode - else: - print('val stage') - model.eval() # Set model to evaluate mode - running_loss = 0.0 - running_corrects = 0 - # Iterate over data. - i = 0 - # loss_a = 0 - # p = 0 - for data in dataloaders[phase]: - inputs,labels = data - inputs = inputs.cuda() - labels = labels.cuda() - # zero the parameter gradients - optimizer.zero_grad() - # forward - # track history if only in train - with torch.set_grad_enabled(phase == 'train'): - outputs = model(inputs) - _, preds = torch.max(outputs, 1) - loss = criterion(outputs, labels) - loss_a = loss.item() - print('[%d ,%5d] loss:%.3f'%(epoch+1,i+1,loss_a)) - # loss_a = 0 - i += 1 - # backward + optimize only if in training phase - if phase == 'train': - loss.backward() - optimizer.step() - # statistics - running_loss += loss.item() * inputs.size(0) - running_corrects += torch.sum(preds == labels.data) - if phase == 'train' and scheduler is not None: - scheduler.step() - epoch_loss = running_loss / dataset_sizes[phase] - epoch_acc = running_corrects.double() / dataset_sizes[phase] - # epoch_loss = running_loss / p - # epoch_acc = running_corrects.double() / p - print('{} Loss: {:.4f} Acc: {:.4f}'.format( - phase, epoch_loss, epoch_acc)) - - # deep copy the model - if phase == 'val' and epoch_acc > best_acc: - best_acc = epoch_acc - best_model_wts = copy.deepcopy(model.state_dict()) - model.load_state_dict(best_model_wts) - path = savePth+'/test_{}.pth'.format(epoch + 1) - torch.save(model.state_dict(), path) - WriteAccuracy(savePth, str((round(float(epoch_acc),4))*100) + '%-' +'epoch=' +str(epoch)) - - time_elapsed = time.time() - since - print('Training complete in {:.0f}m {:.0f}s'.format( - time_elapsed // 60, time_elapsed % 60)) - print('Best val Acc: {:4f}'.format(best_acc)) - - # load best model weights - model.load_state_dict(best_model_wts) - path = savePth + '/best.pth' - torch.save(model.state_dict(), path) - return model From 935aadb72d43decbc00f3b5679b0277e8f1560b8 Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 15:30:00 +0800 Subject: [PATCH 05/11] Add files via upload --- train.py | 289 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 289 insertions(+) create mode 100644 train.py diff --git a/train.py b/train.py new file mode 100644 index 0000000..3b726f0 --- /dev/null +++ b/train.py @@ -0,0 +1,289 @@ +from __future__ import print_function, division +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +import numpy as np +import torchvision +from torchvision import datasets, models, transforms +import matplotlib.pyplot as plt +import time +import os +import copy +from tqdm import tqdm +from collections import OrderedDict + +def download_mnist(save_path): + torchvision.datasets.MNIST(root=save_path,train=True,download=True) + torchvision.datasets.MNIST(root=save_path,train=False,download=True) + return save_path + +def load_mnist(batch_size=64,path='',img_size=32): + if img_size != 32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()] + ) + else: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + trainset = torchvision.datasets.MNIST(root=path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.MNIST(root=path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":60000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar10(save_path): + torchvision.datasets.CIFAR10(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR10(root=save_path,train=False,download=True) + return save_path + +def load_cifar10(batch_size=64,pth_path='./data',img_size=32): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR10(root=pth_path, train=True,download=False, transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.CIFAR10(root=pth_path, train=False,download=False, transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":50000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar100(save_path): + torchvision.datasets.CIFAR100(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR100(root=save_path,train=False,download=False) + return save_path + +def load_cifar100(batch_size,pth_path,img_size): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR100(root=pth_path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True,num_workers=2) + testset = torchvision.datasets.CIFAR100(root=pth_path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=True,num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_size ={"train":50000,"val":10000} + return dataloaders,dataset_size +def test_model(model,dataloaders,dataset_sizes,criterion): + print("validation model:") + phase = "val" + model.cuda() + model.eval() + with torch.no_grad(): + running_loss = 0.0 + running_acc = 0.0 + for inputs,labels in tqdm(dataloaders[phase]): + inputs,labels = inputs.cuda(),labels.cuda() + outputs = model(inputs) + _,preds = torch.max(outputs,1) + loss = criterion(outputs,labels) + running_loss += loss.item() * inputs.size(0) + running_acc += torch.sum(preds == labels.data) + epoch_loss = running_loss/dataset_sizes[phase] + epoch_acc = running_acc / dataset_sizes[phase] + epoch_acc = epoch_acc.item() + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + return epoch_acc,epoch_loss + +def WriteData(savePath, msg): + + full_path = savePath + '/Accuracy.txt' # 也可以创建一个.doc的word文档 + file = open(full_path, 'a') + file.write(msg) #msg也就是下面的Hello world! + # file.close() + +def train_model_jiang(model, dataloaders, dataset_sizes,ratio, type,pattern,criterion, optimizer, name,scheduler=None, num_epochs=100,rerun=False): + if rerun == True: + print('我进来了') + print(num_epochs) + since = time.time() + model.load_state_dict(torch.load('./test_20.pth')) + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + + model.cuda() + for epoch in range(20, num_epochs): + print('Epoch {}/{}'.format(epoch + 1, num_epochs)) + print('-' * 10) + print('the %d lr:%f' % (epoch + 1, optimizer.param_groups[0]['lr'])) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + i = 0 + loss_a = 0 + p = 0 + for data in dataloaders[phase]: + inputs, labels = data + inputs = inputs.cuda() + labels = labels.cuda() + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f' % (epoch + 1, i + 1, loss_a)) + loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = './test_{}.pth'.format(epoch+1) + torch.save(model.state_dict(), path) + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = './best.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + + if rerun == False: + since = time.time() + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + if type == 'activation': + savePth = './pth/'+name+'/ratio='+str(ratio)+'/Activation' + else: + if pattern == 'retrain': + savePth = './pth/'+name+'/ratio='+str(ratio)+'/ActivationWeight' + elif pattern == 'train': + savePth = './pth/' + name + '/ratio=' + str(ratio) + '/Weight' + model.cuda() + WriteData(savePth,'ratio='+str(ratio)+'\n') + for epoch in range(num_epochs): + print('Epoch {}/{}'.format(epoch+1, num_epochs)) + print('-' * 10) + print('the %d lr:%f'%(epoch+1,optimizer.param_groups[0]['lr'])) + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + running_loss = 0.0 + running_corrects = 0 + # Iterate over data. + i = 0 + # loss_a = 0 + # p = 0 + for data in dataloaders[phase]: + inputs,labels = data + inputs = inputs.cuda() + labels = labels.cuda() + # zero the parameter gradients + optimizer.zero_grad() + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f'%(epoch+1,i+1,loss_a)) + # loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = savePth+'/test_{}.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + WriteData(savePth, str((round(float(epoch_acc),4))*100) + '%-' +'epoch=' +str(epoch)+'\n') + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = savePth + '/best.pth' + torch.save(model.state_dict(), path) + return model From 8faa6275c35b9120e3e109f02436eeed1853051f Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:45:31 +0800 Subject: [PATCH 06/11] Delete model.py --- model.py | 636 ------------------------------------------------------- 1 file changed, 636 deletions(-) delete mode 100644 model.py diff --git a/model.py b/model.py deleted file mode 100644 index aae0ea7..0000000 --- a/model.py +++ /dev/null @@ -1,636 +0,0 @@ -from collections import OrderedDict -import torch.nn as nn -import torch.utils.model_zoo as model_zoo -import torch.nn.functional as F -import math -import torch - -class AlexNet(nn.Module): - - def __init__(self, num_classes=10): - super(AlexNet, self).__init__() - self.features = nn.Sequential( - nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2), - nn.Conv2d(96, 256, kernel_size=5, padding=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2), - nn.Conv2d(256, 384, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(384, 384, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(384, 256, kernel_size=3, padding=1), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2), - ) - self.classifier = nn.Sequential( - nn.Dropout(), - nn.Linear(256 * 6 * 6, 4096), - nn.ReLU(inplace=True), - nn.Dropout(), - nn.Linear(4096, 4096), - nn.ReLU(inplace=True), - nn.Linear(4096, num_classes), - # nn.Softmax() - ) - - def forward(self, x): - if hasattr(self, "first_input_prune"): - x = self.first_input_prune(x) - x = self.features(x) - x = x.view(x.size(0), 256 * 6 * 6) - x = self.classifier(x) - return x -class LeNet(nn.Module): - def __init__(self, num_classes=10): - super(LeNet, self).__init__() - self.features = nn.Sequential( - nn.Conv2d(1, 6, kernel_size=5), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Conv2d(6, 16, kernel_size=5), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Conv2d(16, 120, kernel_size=5), - nn.ReLU(inplace=True) - ) - self.classifier = nn.Sequential( - nn.Linear(120, 84), - nn.ReLU(inplace=True), - nn.Linear(84, num_classes) - ) - - def forward(self, x): - if hasattr(self, "first_input_prune"): - x = self.first_input_prune(x) - x = self.features(x) - x = x.view(x.size(0), -1) - x = self.classifier(x) - return x - -class VGG(nn.Module): - - def __init__(self, features, num_classes=10): - super(VGG, self).__init__() - self.features = features - self.classifier = nn.Sequential( - nn.Linear(512 * 7 * 7, 4096), - nn.ReLU(inplace=True), - nn.Dropout(), - nn.Linear(4096, 4096), - nn.ReLU(inplace=True), - nn.Dropout(), - nn.Linear(4096, num_classes), - ) - self._initialize_weights() - - def forward(self, x): - if hasattr(self, "first_input_prune"): - x = self.first_input_prune(x) - x = self.features(x) - x = x.view(x.size(0), -1) - x = self.classifier(x) - return x - - def _initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - n = m.weight.size(1) - m.weight.data.normal_(0, 0.01) - m.bias.data.zero_() - - -class ResNet(nn.Module): - def __init__(self, block, layers, num_classes=10): - self.inplanes = 64 - super(ResNet, self).__init__() - - m = OrderedDict() - m['conv1'] = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) - m['bn1'] = nn.BatchNorm2d(64) - m['relu1'] = nn.ReLU(inplace=True) - m['maxpool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.group1 = nn.Sequential(m) - - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - - self.avgpool = nn.Sequential(nn.AvgPool2d(7)) - - self.group2 = nn.Sequential( - OrderedDict([ - ('fc', nn.Linear(512 * block.expansion, num_classes)) - ]) - ) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.group1(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.group2(x) - - return x - - -class ResNetBasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(ResNetBasicBlock, self).__init__() - m = OrderedDict() - m['conv1'] = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - m['relu1'] = nn.ReLU(inplace=True) - m['conv2'] = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) - self.group1 = nn.Sequential(m) - self.relu = nn.Sequential(nn.ReLU(inplace=True)) - self.downsample = downsample - - def forward(self, x): - if self.downsample is not None: - residual = self.downsample(x) - else: - residual = x - out = self.group1(x) + residual - out = self.relu(out) - return out - -class Fire(nn.Module): - - def __init__(self, inplanes, squeeze_planes, - expand1x1_planes, expand3x3_planes): - super(Fire, self).__init__() - self.inplanes = inplanes - - self.group1 = nn.Sequential( - OrderedDict([ - ('squeeze', nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)), - ('squeeze_activation', nn.ReLU(inplace=True)) - ]) - ) - - self.group2 = nn.Sequential( - OrderedDict([ - ('expand1x1', nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1)), - ('expand1x1_activation', nn.ReLU(inplace=True)) - ]) - ) - - self.group3 = nn.Sequential( - OrderedDict([ - ('expand3x3', nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)), - ('expand3x3_activation', nn.ReLU(inplace=True)) - ]) - ) - - def forward(self, x): - x = self.group1(x) - return torch.cat([self.group2(x), self.group3(x)], 1) - - -class SqueezeNet(nn.Module): - - def __init__(self, num_classes=1000): - super(SqueezeNet, self).__init__() - self.num_classes = num_classes - self.features = nn.Sequential( - nn.Conv2d(3, 96, kernel_size=7, stride=2), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), - Fire(96, 16, 64, 64), - Fire(128, 16, 64, 64), - Fire(128, 32, 128, 128), - nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), - Fire(256, 32, 128, 128), - Fire(256, 48, 192, 192), - Fire(384, 48, 192, 192), - Fire(384, 64, 256, 256), - nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), - Fire(512, 64, 256, 256), - ) - # Final convolution is initialized differently form the rest - final_conv = nn.Conv2d(512, num_classes, kernel_size=1) - self.classifier = nn.Sequential( - nn.Dropout(p=0.5), - final_conv, - nn.ReLU(inplace=True), - nn.AvgPool2d(13) - ) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - gain = 2.0 - if m is final_conv: - m.weight.data.normal_(0, 0.01) - else: - fan_in = m.kernel_size[0] * m.kernel_size[1] * m.in_channels - u = math.sqrt(3.0 * gain / fan_in) - m.weight.data.uniform_(-u, u) - if m.bias is not None: - m.bias.data.zero_() - - def forward(self, x): - x = self.features(x) - x = self.classifier(x) - return x.view(x.size(0), self.num_classes) - - -class Inception3(nn.Module): - - def __init__(self, num_classes=1000, aux_logits=False, transform_input=False): - super(Inception3, self).__init__() - self.aux_logits = aux_logits - self.transform_input = transform_input - self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, stride=2) - self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3) - self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) - self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) - self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) - self.Mixed_5b = InceptionA(192, pool_features=32) - self.Mixed_5c = InceptionA(256, pool_features=64) - self.Mixed_5d = InceptionA(288, pool_features=64) - self.Mixed_6a = InceptionB(288) - self.Mixed_6b = InceptionC(768, channels_7x7=128) - self.Mixed_6c = InceptionC(768, channels_7x7=160) - self.Mixed_6d = InceptionC(768, channels_7x7=160) - self.Mixed_6e = InceptionC(768, channels_7x7=192) - if aux_logits: - self.AuxLogits = InceptionAux(768, num_classes) - self.Mixed_7a = InceptionD(768) - self.Mixed_7b = InceptionE(1280) - self.Mixed_7c = InceptionE(2048) - self.group1 = nn.Sequential( - OrderedDict([ - ('fc', nn.Linear(2048, num_classes)) - ]) - ) - - for m in self.modules(): - if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): - import scipy.stats as stats - stddev = m.stddev if hasattr(m, 'stddev') else 0.1 - X = stats.truncnorm(-2, 2, scale=stddev) - values = torch.Tensor(X.rvs(m.weight.data.numel())) - m.weight.data.copy_(values.reshape(m.weight.shape)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def forward(self, x): - if self.transform_input: - x = x.clone() - x[0] = x[0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 - x[1] = x[1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 - x[2] = x[2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 - # 299 x 299 x 3 - x = self.Conv2d_1a_3x3(x) - # 149 x 149 x 32 - x = self.Conv2d_2a_3x3(x) - # 147 x 147 x 32 - x = self.Conv2d_2b_3x3(x) - # 147 x 147 x 64 - x = F.max_pool2d(x, kernel_size=3, stride=2) - # 73 x 73 x 64 - x = self.Conv2d_3b_1x1(x) - # 73 x 73 x 80 - x = self.Conv2d_4a_3x3(x) - # 71 x 71 x 192 - x = F.max_pool2d(x, kernel_size=3, stride=2) - # 35 x 35 x 192 - x = self.Mixed_5b(x) - # 35 x 35 x 256 - x = self.Mixed_5c(x) - # 35 x 35 x 288 - x = self.Mixed_5d(x) - # 35 x 35 x 288 - x = self.Mixed_6a(x) - # 17 x 17 x 768 - x = self.Mixed_6b(x) - # 17 x 17 x 768 - x = self.Mixed_6c(x) - # 17 x 17 x 768 - x = self.Mixed_6d(x) - # 17 x 17 x 768 - x = self.Mixed_6e(x) - # 17 x 17 x 768 - if self.training and self.aux_logits: - aux = self.AuxLogits(x) - # 17 x 17 x 768 - x = self.Mixed_7a(x) - # 8 x 8 x 1280 - x = self.Mixed_7b(x) - # 8 x 8 x 2048 - x = self.Mixed_7c(x) - # 8 x 8 x 2048 - x = F.avg_pool2d(x, kernel_size=8) - # 1 x 1 x 2048 - x = F.dropout(x, training=self.training) - # 1 x 1 x 2048 - x = x.view(x.size(0), -1) - # 2048 - x = self.group1(x) - # 1000 (num_classes) - if self.training and self.aux_logits: - return x, aux - return x - - -class InceptionA(nn.Module): - - def __init__(self, in_channels, pool_features): - super(InceptionA, self).__init__() - self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1) - - self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1) - self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2) - - self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) - self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) - self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1) - - self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch5x5 = self.branch5x5_1(x) - branch5x5 = self.branch5x5_2(branch5x5) - - branch3x3dbl = self.branch3x3dbl_1(x) - branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) - branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) - - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] - return torch.cat(outputs, 1) - - -class InceptionB(nn.Module): - - def __init__(self, in_channels): - super(InceptionB, self).__init__() - self.branch3x3 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2) - - self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) - self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) - self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, stride=2) - - def forward(self, x): - branch3x3 = self.branch3x3(x) - - branch3x3dbl = self.branch3x3dbl_1(x) - branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) - branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) - - branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) - - outputs = [branch3x3, branch3x3dbl, branch_pool] - return torch.cat(outputs, 1) - - -class InceptionC(nn.Module): - - def __init__(self, in_channels, channels_7x7): - super(InceptionC, self).__init__() - self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1) - - c7 = channels_7x7 - self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1) - self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) - self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0)) - - self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1) - self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) - self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) - self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) - self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) - - self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch7x7 = self.branch7x7_1(x) - branch7x7 = self.branch7x7_2(branch7x7) - branch7x7 = self.branch7x7_3(branch7x7) - - branch7x7dbl = self.branch7x7dbl_1(x) - branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) - branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) - branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) - branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) - - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] - return torch.cat(outputs, 1) - - -class InceptionD(nn.Module): - - def __init__(self, in_channels): - super(InceptionD, self).__init__() - self.branch3x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) - self.branch3x3_2 = BasicConv2d(192, 320, kernel_size=3, stride=2) - - self.branch7x7x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) - self.branch7x7x3_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)) - self.branch7x7x3_3 = BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)) - self.branch7x7x3_4 = BasicConv2d(192, 192, kernel_size=3, stride=2) - - def forward(self, x): - branch3x3 = self.branch3x3_1(x) - branch3x3 = self.branch3x3_2(branch3x3) - - branch7x7x3 = self.branch7x7x3_1(x) - branch7x7x3 = self.branch7x7x3_2(branch7x7x3) - branch7x7x3 = self.branch7x7x3_3(branch7x7x3) - branch7x7x3 = self.branch7x7x3_4(branch7x7x3) - - branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) - outputs = [branch3x3, branch7x7x3, branch_pool] - return torch.cat(outputs, 1) - - -class InceptionE(nn.Module): - - def __init__(self, in_channels): - super(InceptionE, self).__init__() - self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1) - - self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1) - self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) - self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) - - self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1) - self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) - self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) - self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) - - self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) - - def forward(self, x): - branch1x1 = self.branch1x1(x) - - branch3x3 = self.branch3x3_1(x) - branch3x3 = [ - self.branch3x3_2a(branch3x3), - self.branch3x3_2b(branch3x3), - ] - branch3x3 = torch.cat(branch3x3, 1) - - branch3x3dbl = self.branch3x3dbl_1(x) - branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) - branch3x3dbl = [ - self.branch3x3dbl_3a(branch3x3dbl), - self.branch3x3dbl_3b(branch3x3dbl), - ] - branch3x3dbl = torch.cat(branch3x3dbl, 1) - - branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) - branch_pool = self.branch_pool(branch_pool) - - outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] - return torch.cat(outputs, 1) - - -class InceptionAux(nn.Module): - - def __init__(self, in_channels, num_classes): - super(InceptionAux, self).__init__() - self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1) - self.conv1 = BasicConv2d(128, 768, kernel_size=5) - self.conv1.stddev = 0.01 - - fc = nn.Linear(768, num_classes) - fc.stddev = 0.001 - - self.group1 = nn.Sequential( - OrderedDict([ - ('fc', fc) - ]) - ) - - def forward(self, x): - # 17 x 17 x 768 - x = F.avg_pool2d(x, kernel_size=5, stride=3) - # 5 x 5 x 768 - x = self.conv0(x) - # 5 x 5 x 128 - x = self.conv1(x) - # 1 x 1 x 768 - x = x.view(x.size(0), -1) - # 768 - x = self.group1(x) - # 1000 - return x - - -class BasicConv2d(nn.Module): - - def __init__(self, in_channels, out_channels, **kwargs): - super(BasicConv2d, self).__init__() - self.group1 = nn.Sequential( - OrderedDict([ - ('conv', nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)) - # ,('bn', nn.BatchNorm2d(out_channels, eps=0.001)) - ]) - ) - - def forward(self, x): - x = self.group1(x) - return F.relu(x, inplace=True) - - -def vgg_make_layers(cfg, batch_norm=False): - layers = [] - in_channels = 3 - for v in cfg: - if v == 'M': - layers += [nn.MaxPool2d(kernel_size=2, stride=2)] - else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) - if batch_norm: - layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] - else: - layers += [conv2d, nn.ReLU(inplace=True)] - in_channels = v - return nn.Sequential(*layers) - - - - -def getLeNet(num_classes=10): - model = LeNet(num_classes) - return model - -def getAlexnet(num_classes=10): - model = AlexNet(num_classes) - return model - -def get_vgg16(num_classes=10): - vgg16_setting = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] - model = VGG(vgg_make_layers(vgg16_setting), num_classes) - return model - - -def get_resnet18(num_classes=10): - model = ResNet(ResNetBasicBlock, [2, 2, 2, 2], num_classes) - return model - - -def get_squeezenet(num_classes=10): - model = SqueezeNet(num_classes) - return model - - -def get_inception_v3(num_classes=10): - model = Inception3(num_classes) - return model - - - - From feafeac7386d579377a09d36d17f88dbc1e80745 Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:45:59 +0800 Subject: [PATCH 07/11] Add files via upload --- model.py | 632 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 632 insertions(+) create mode 100644 model.py diff --git a/model.py b/model.py new file mode 100644 index 0000000..7a56d8f --- /dev/null +++ b/model.py @@ -0,0 +1,632 @@ +from collections import OrderedDict +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch.nn.functional as F +import math +import torch + +class AlexNet(nn.Module): + + def __init__(self, num_classes=10): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(96, 256, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(256, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + # nn.Softmax() + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + return x +class LeNet(nn.Module): + def __init__(self, num_classes=10): + super(LeNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 6, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(6, 16, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(16, 120, kernel_size=5), + nn.ReLU(inplace=True) + ) + self.classifier = nn.Sequential( + nn.Linear(120, 84), + nn.ReLU(inplace=True), + nn.Linear(84, num_classes) + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + +class VGG(nn.Module): + + def __init__(self, features, num_classes=10): + super(VGG, self).__init__() + self.features = features + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + self._initialize_weights() + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + + +class ResNet(nn.Module): + def __init__(self, block, layers, num_classes=10): + self.inplanes = 64 + super(ResNet, self).__init__() + + m = OrderedDict() + m['conv1'] = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + m['bn1'] = nn.BatchNorm2d(64) + m['relu1'] = nn.ReLU(inplace=True) + m['maxpool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.group1 = nn.Sequential(m) + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + self.avgpool = nn.Sequential(nn.AvgPool2d(7)) + + self.group2 = nn.Sequential( + OrderedDict([ + ('fc', nn.Linear(512 * block.expansion, num_classes)) + ]) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.group1(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.group2(x) + + return x + + +class ResNetBasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(ResNetBasicBlock, self).__init__() + m = OrderedDict() + m['conv1'] = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + m['relu1'] = nn.ReLU(inplace=True) + m['conv2'] = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.group1 = nn.Sequential(m) + self.relu = nn.Sequential(nn.ReLU(inplace=True)) + self.downsample = downsample + + def forward(self, x): + if self.downsample is not None: + residual = self.downsample(x) + else: + residual = x + out = self.group1(x) + residual + out = self.relu(out) + return out + +class Fire(nn.Module): + + def __init__(self, inplanes, squeeze_planes, + expand1x1_planes, expand3x3_planes): + super(Fire, self).__init__() + self.inplanes = inplanes + + self.group1 = nn.Sequential( + OrderedDict([ + ('squeeze', nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)), + ('squeeze_activation', nn.ReLU(inplace=True)) + ]) + ) + + self.group2 = nn.Sequential( + OrderedDict([ + ('expand1x1', nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1)), + ('expand1x1_activation', nn.ReLU(inplace=True)) + ]) + ) + + self.group3 = nn.Sequential( + OrderedDict([ + ('expand3x3', nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)), + ('expand3x3_activation', nn.ReLU(inplace=True)) + ]) + ) + + def forward(self, x): + x = self.group1(x) + return torch.cat([self.group2(x), self.group3(x)], 1) + + +class SqueezeNet(nn.Module): + + def __init__(self, num_classes=1000): + super(SqueezeNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(512, 64, 256, 256), + ) + # Final convolution is initialized differently form the rest + final_conv = nn.Conv2d(512, num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=0.5), + final_conv, + nn.ReLU(inplace=True), + nn.AvgPool2d(13) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + gain = 2.0 + if m is final_conv: + m.weight.data.normal_(0, 0.01) + else: + fan_in = m.kernel_size[0] * m.kernel_size[1] * m.in_channels + u = math.sqrt(3.0 * gain / fan_in) + m.weight.data.uniform_(-u, u) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.features(x) + x = self.classifier(x) + return x.view(x.size(0), self.num_classes) + + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=False, transform_input=False): + super(Inception3, self).__init__() + self.aux_logits = aux_logits + self.transform_input = transform_input + self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3,stride=2) + self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) + self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) + self.Mixed_5b = InceptionA(192, pool_features=32) + self.Mixed_5c = InceptionA(256, pool_features=64) + self.Mixed_5d = InceptionA(288, pool_features=64) + self.Mixed_6a = InceptionB(288) + self.Mixed_6b = InceptionC(768, channels_7x7=128) + self.Mixed_6c = InceptionC(768, channels_7x7=160) + self.Mixed_6d = InceptionC(768, channels_7x7=160) + self.Mixed_6e = InceptionC(768, channels_7x7=192) + if aux_logits: + self.AuxLogits = InceptionAux(768, num_classes) + self.Mixed_7a = InceptionD(768) + self.Mixed_7b = InceptionE(1280) + self.Mixed_7c = InceptionE(2048) + self.group1 = nn.Sequential( + OrderedDict([ + ('fc', nn.Linear(2048, num_classes)) + ]) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + stddev = m.stddev if hasattr(m, 'stddev') else 0.1 + X = stats.truncnorm(-2, 2, scale=stddev) + values = torch.Tensor(X.rvs(m.weight.data.numel())) + m.weight.data.copy_(values.reshape(m.weight.shape)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def forward(self, x): + if self.transform_input: + x = x.clone() + x[0] = x[0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x[1] = x[1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x[2] = x[2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + # 299 x 299 x 3 + x = self.Conv2d_1a_3x3(x) + # 149 x 149 x 32 + x = self.Conv2d_2a_3x3(x) + # 147 x 147 x 32 + x = self.Conv2d_2b_3x3(x) + # 147 x 147 x 64 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # 73 x 73 x 64 + x = self.Conv2d_3b_1x1(x) + # 73 x 73 x 80 + x = self.Conv2d_4a_3x3(x) + # 71 x 71 x 192 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # 35 x 35 x 192 + x = self.Mixed_5b(x) + # 35 x 35 x 256 + x = self.Mixed_5c(x) + # 35 x 35 x 288 + x = self.Mixed_5d(x) + # 35 x 35 x 288 + x = self.Mixed_6a(x) + # 17 x 17 x 768 + x = self.Mixed_6b(x) + # 17 x 17 x 768 + x = self.Mixed_6c(x) + # 17 x 17 x 768 + x = self.Mixed_6d(x) + # 17 x 17 x 768 + x = self.Mixed_6e(x) + # 17 x 17 x 768 + if self.training and self.aux_logits: + aux = self.AuxLogits(x) + # 17 x 17 x 768 + x = self.Mixed_7a(x) + # 8 x 8 x 1280 + x = self.Mixed_7b(x) + # 8 x 8 x 2048 + x = self.Mixed_7c(x) + # 8 x 8 x 2048 + x = F.avg_pool2d(x, kernel_size=8) + # 1 x 1 x 2048 + x = F.dropout(x, training=self.training) + # 1 x 1 x 2048 + x = x.view(x.size(0), -1) + # 2048 + x = self.group1(x) + # 1000 (num_classes) + if self.training and self.aux_logits: + return x, aux + return x + + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features): + super(InceptionA, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1) + + self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1) + self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1) + + self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionB(nn.Module): + + def __init__(self, in_channels): + super(InceptionB, self).__init__() + self.branch3x3 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + + outputs = [branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7): + super(InceptionC, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1) + + c7 = channels_7x7 + self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1) + self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + + self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + + self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionD(nn.Module): + + def __init__(self, in_channels): + super(InceptionD, self).__init__() + self.branch3x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) + self.branch3x3_2 = BasicConv2d(192, 320, kernel_size=3, stride=2) + + self.branch7x7x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = BasicConv2d(192, 192, kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionE(nn.Module): + + def __init__(self, in_channels): + super(InceptionE, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1) + + self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1) + self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes): + super(InceptionAux, self).__init__() + self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1) + self.conv1 = BasicConv2d(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + + fc = nn.Linear(768, num_classes) + fc.stddev = 0.001 + + self.group1 = nn.Sequential( + OrderedDict([ + ('fc', fc) + ]) + ) + + def forward(self, x): + # 17 x 17 x 768 + x = F.avg_pool2d(x, kernel_size=5, stride=3) + # 5 x 5 x 768 + x = self.conv0(x) + # 5 x 5 x 128 + x = self.conv1(x) + # 1 x 1 x 768 + x = x.view(x.size(0), -1) + # 768 + x = self.group1(x) + # 1000 + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.group1 = nn.Sequential( + OrderedDict([ + ('conv', nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)) + # ,('bn', nn.BatchNorm2d(out_channels, eps=0.001)) + ]) + ) + + def forward(self, x): + x = self.group1(x) + return F.relu(x, inplace=True) + +def vgg_make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + + +def getLeNet(num_classes=10): + model = LeNet(num_classes) + return model + +def getAlexnet(num_classes=10): + model = AlexNet(num_classes) + return model + +def get_vgg16(num_classes=10): + vgg16_setting = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] + model = VGG(vgg_make_layers(vgg16_setting), num_classes) + return model + + +def get_resnet18(num_classes=10): + model = ResNet(ResNetBasicBlock, [2, 2, 2, 2], num_classes) + return model + + +def get_squeezenet(num_classes=10): + model = SqueezeNet(num_classes) + return model + + +def get_inception_v3(num_classes=10): + model = Inception3(num_classes) + return model + + From 168427c4320013fea17e5998ac55040127e8cbe6 Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:46:42 +0800 Subject: [PATCH 08/11] Delete ActivationPrune.py --- ActivationPrune.py | 134 --------------------------------------------- 1 file changed, 134 deletions(-) delete mode 100644 ActivationPrune.py diff --git a/ActivationPrune.py b/ActivationPrune.py deleted file mode 100644 index bf511a3..0000000 --- a/ActivationPrune.py +++ /dev/null @@ -1,134 +0,0 @@ -import copy -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Function -import time -from model import * -from train import * -import random -# from .model import ResNetBasicBlock - -from math import sqrt -import copy -from time import time -from Conv2dNew import Execution - - -class Conv2dTest(nn.Conv2d): - def __init__(self, - ratio, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - padding_mode='zeros', - ): - super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, - bias, padding_mode) - self.ratio = ratio - def forward(self, input): - E = Execution(self.ratio) - output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) - return output - -class LinearTest(nn.Linear): - def __init__(self, - in_features, - out_features, - bias=True, - ): - super(LinearTest, self).__init__(in_features, out_features, bias) - - def forward(self, input): - output = F.linear(input, self.weight, self.bias) - return output - -def prepare(model, ratio,inplace=False): - # move intpo prepare - def addActivationPruneOp(module): - nonlocal layer_cnt - for name, child in module.named_children(): - if isinstance(child, nn.Conv2d): - p_name = str(layer_cnt) - activationPruneConv = Conv2dTest( - ratio, - child.in_channels, - child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, - dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), - padding_mode=child.padding_mode - ) - if child.bias is not None: - activationPruneConv.bias = child.bias - activationPruneConv.weight = child.weight - module._modules[name] = activationPruneConv - layer_cnt += 1 - elif isinstance(child, nn.Linear): - p_name = str(layer_cnt) - activationPruneLinear = LinearTest( - child.in_features, child.out_features, - bias=(child.bias is not None) - ) - if child.bias is not None: - activationPruneLinear.bias = child.bias - activationPruneLinear.weight = child.weight - module._modules[name] = activationPruneLinear - layer_cnt += 1 - else: - addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 - layer_cnt = 0 - if not inplace: - model = copy.deepcopy(model) - addActivationPruneOp( model) # 为每一个卷积层添加输入特征图剪枝操作 - return model - -def getModel(modelName): - if modelName == 'LeNet': - return getLeNet() # 加载原始模型框架 - elif modelName == 'AlexNet': - return getAlexnet() - elif modelName == 'VGG16': - return get_vgg16() - elif modelName == 'SqueezeNet': - return get_squeezenet() - elif modelName == 'ResNet': - return get_resnet18() - -def getDataSet(modelName,batchSize,imgSize): - if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet': - dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', - img_size=imgSize) # 确定数据集 - elif modelName == 'LeNet': - dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) - - return dataloaders,dataset_sizes - -def getPruneModel(model_name, weight_file_path,pattern,ratio): - model_orign = getModel(model_name) - if pattern == 'test' or pattern == 'retrain': - model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 - activationPruneModel = prepare(model_orign,ratio) - - return activationPruneModel - -def activationPruneModelOp(model_name, batch_size, img_size,pattern,ratio,epoch): - dataloaders, dataset_sizes = getDataSet(model_name, batch_size, img_size) - criterion = nn.CrossEntropyLoss() - - if pattern == 'retrain' or pattern == 'train': - weight_file_path = './pth/' + model_name + '/ratio=0'+ '/Activation' + '/best.pth' - activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) - optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.01, momentum=0.9) - scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 - train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, ratio, 'activation',pattern, criterion=criterion,optimizer=optimizer, name=model_name, - scheduler=scheduler, num_epochs=epoch, rerun=False) # 进行模型的训练 - if pattern == 'test': - weight_file_path = './pth/' + model_name + '/ratio=' + str(ratio) + '/Activation/' + 'best.pth' - activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) - test_model(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion) - - From 15999a8b1bef19a07755d1c88be9386f038bbab6 Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:47:09 +0800 Subject: [PATCH 09/11] Add files via upload --- ActivationPrune.py | 139 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 ActivationPrune.py diff --git a/ActivationPrune.py b/ActivationPrune.py new file mode 100644 index 0000000..37e1836 --- /dev/null +++ b/ActivationPrune.py @@ -0,0 +1,139 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +import time +from model import * +from train import * +import random +# from .model import ResNetBasicBlock + +from math import sqrt +import copy +from time import time +from Conv2dNew import Execution + + + +class Conv2dTest(nn.Conv2d): + def __init__(self, + ratio, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + ): + super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, + bias, padding_mode) + self.ratio = ratio + def forward(self, input): + E = Execution(self.ratio) + output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) + return output + +class LinearTest(nn.Linear): + def __init__(self, + in_features, + out_features, + bias=True, + ): + super(LinearTest, self).__init__(in_features, out_features, bias) + + def forward(self, input): + output = F.linear(input, self.weight, self.bias) + return output + +def prepare(model, ratio,inplace=False): + # move intpo prepare + def addActivationPruneOp(module): + nonlocal layer_cnt + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + p_name = str(layer_cnt) + activationPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationPruneConv.bias = child.bias + activationPruneConv.weight = child.weight + module._modules[name] = activationPruneConv + layer_cnt += 1 + elif isinstance(child, nn.Linear): + p_name = str(layer_cnt) + activationPruneLinear = LinearTest( + child.in_features, child.out_features, + bias=(child.bias is not None) + ) + if child.bias is not None: + activationPruneLinear.bias = child.bias + activationPruneLinear.weight = child.weight + module._modules[name] = activationPruneLinear + layer_cnt += 1 + else: + addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + layer_cnt = 0 + if not inplace: + model = copy.deepcopy(model) + addActivationPruneOp( model) # 为每一个卷积层添加输入特征图剪枝操作 + return model + +def getModel(modelName): + if modelName == 'LeNet': + return getLeNet() # 加载原始模型框架 + elif modelName == 'AlexNet': + return getAlexnet() + elif modelName == 'VGG16': + return get_vgg16() + elif modelName == 'SqueezeNet': + return get_squeezenet() + elif modelName == 'ResNet': + return get_resnet18() + elif modelName == 'InceptionV3': + return get_inception_v3() + # if modelName == 'MobileNet': + # return mobilenetv3_large() + +def getDataSet(modelName,batchSize,imgSize): + if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet' or modelName=='InceptionV3': + dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', + img_size=imgSize) # 确定数据集 + elif modelName == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) + + return dataloaders,dataset_sizes + +def getPruneModel(model_name, weight_file_path,pattern,ratio): + model_orign = getModel(model_name) + if pattern == 'test' or pattern == 'retrain': + model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 + activationPruneModel = prepare(model_orign,ratio) + + return activationPruneModel + +def activationPruneModelOp(model_name, batch_size, img_size,pattern,ratio,epoch): + dataloaders, dataset_sizes = getDataSet(model_name, batch_size, img_size) + criterion = nn.CrossEntropyLoss() + + if pattern == 'retrain' or pattern == 'train': + weight_file_path = './pth/' + model_name + '/ratio=0'+ '/Activation' + '/best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.000001, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, ratio, 'activation',pattern, criterion=criterion,optimizer=optimizer, name=model_name, + scheduler=scheduler, num_epochs=epoch, rerun=False) # 进行模型的训练 + if pattern == 'test': + weight_file_path = './pth/' + model_name + '/ratio=' + str(ratio) + '/Activation/' + 'best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + test_model(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion) + + From f7c8a0010117e31a9b36274c1ee35b1480975340 Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:50:10 +0800 Subject: [PATCH 10/11] Delete ActivationPrune.py --- ActivationPrune.py | 139 --------------------------------------------- 1 file changed, 139 deletions(-) delete mode 100644 ActivationPrune.py diff --git a/ActivationPrune.py b/ActivationPrune.py deleted file mode 100644 index 37e1836..0000000 --- a/ActivationPrune.py +++ /dev/null @@ -1,139 +0,0 @@ -import copy -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Function -import time -from model import * -from train import * -import random -# from .model import ResNetBasicBlock - -from math import sqrt -import copy -from time import time -from Conv2dNew import Execution - - - -class Conv2dTest(nn.Conv2d): - def __init__(self, - ratio, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - bias=True, - padding_mode='zeros', - ): - super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, - bias, padding_mode) - self.ratio = ratio - def forward(self, input): - E = Execution(self.ratio) - output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) - return output - -class LinearTest(nn.Linear): - def __init__(self, - in_features, - out_features, - bias=True, - ): - super(LinearTest, self).__init__(in_features, out_features, bias) - - def forward(self, input): - output = F.linear(input, self.weight, self.bias) - return output - -def prepare(model, ratio,inplace=False): - # move intpo prepare - def addActivationPruneOp(module): - nonlocal layer_cnt - for name, child in module.named_children(): - if isinstance(child, nn.Conv2d): - p_name = str(layer_cnt) - activationPruneConv = Conv2dTest( - ratio, - child.in_channels, - child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, - dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), - padding_mode=child.padding_mode - ) - if child.bias is not None: - activationPruneConv.bias = child.bias - activationPruneConv.weight = child.weight - module._modules[name] = activationPruneConv - layer_cnt += 1 - elif isinstance(child, nn.Linear): - p_name = str(layer_cnt) - activationPruneLinear = LinearTest( - child.in_features, child.out_features, - bias=(child.bias is not None) - ) - if child.bias is not None: - activationPruneLinear.bias = child.bias - activationPruneLinear.weight = child.weight - module._modules[name] = activationPruneLinear - layer_cnt += 1 - else: - addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 - layer_cnt = 0 - if not inplace: - model = copy.deepcopy(model) - addActivationPruneOp( model) # 为每一个卷积层添加输入特征图剪枝操作 - return model - -def getModel(modelName): - if modelName == 'LeNet': - return getLeNet() # 加载原始模型框架 - elif modelName == 'AlexNet': - return getAlexnet() - elif modelName == 'VGG16': - return get_vgg16() - elif modelName == 'SqueezeNet': - return get_squeezenet() - elif modelName == 'ResNet': - return get_resnet18() - elif modelName == 'InceptionV3': - return get_inception_v3() - # if modelName == 'MobileNet': - # return mobilenetv3_large() - -def getDataSet(modelName,batchSize,imgSize): - if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet' or modelName=='InceptionV3': - dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', - img_size=imgSize) # 确定数据集 - elif modelName == 'LeNet': - dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) - - return dataloaders,dataset_sizes - -def getPruneModel(model_name, weight_file_path,pattern,ratio): - model_orign = getModel(model_name) - if pattern == 'test' or pattern == 'retrain': - model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 - activationPruneModel = prepare(model_orign,ratio) - - return activationPruneModel - -def activationPruneModelOp(model_name, batch_size, img_size,pattern,ratio,epoch): - dataloaders, dataset_sizes = getDataSet(model_name, batch_size, img_size) - criterion = nn.CrossEntropyLoss() - - if pattern == 'retrain' or pattern == 'train': - weight_file_path = './pth/' + model_name + '/ratio=0'+ '/Activation' + '/best.pth' - activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) - optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.000001, momentum=0.9) - scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 - train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, ratio, 'activation',pattern, criterion=criterion,optimizer=optimizer, name=model_name, - scheduler=scheduler, num_epochs=epoch, rerun=False) # 进行模型的训练 - if pattern == 'test': - weight_file_path = './pth/' + model_name + '/ratio=' + str(ratio) + '/Activation/' + 'best.pth' - activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) - test_model(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion) - - From d883b38d02f04d35e104be7acaf6268215cd321a Mon Sep 17 00:00:00 2001 From: xkjiang-srfv <52552899+xkjiang-srfv@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:50:55 +0800 Subject: [PATCH 11/11] Add files via upload --- ActivationPrune.py | 139 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 ActivationPrune.py diff --git a/ActivationPrune.py b/ActivationPrune.py new file mode 100644 index 0000000..f0a3c98 --- /dev/null +++ b/ActivationPrune.py @@ -0,0 +1,139 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +import time +from model import * +from train import * +import random +# from .model import ResNetBasicBlock + +from math import sqrt +import copy +from time import time +from Conv2dNew import Execution + + + +class Conv2dTest(nn.Conv2d): + def __init__(self, + ratio, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + ): + super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, + bias, padding_mode) + self.ratio = ratio + def forward(self, input): + E = Execution(self.ratio) + output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) + return output + +class LinearTest(nn.Linear): + def __init__(self, + in_features, + out_features, + bias=True, + ): + super(LinearTest, self).__init__(in_features, out_features, bias) + + def forward(self, input): + output = F.linear(input, self.weight, self.bias) + return output + +def prepare(model, ratio,inplace=False): + # move intpo prepare + def addActivationPruneOp(module): + nonlocal layer_cnt + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + p_name = str(layer_cnt) + activationPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationPruneConv.bias = child.bias + activationPruneConv.weight = child.weight + module._modules[name] = activationPruneConv + layer_cnt += 1 + elif isinstance(child, nn.Linear): + p_name = str(layer_cnt) + activationPruneLinear = LinearTest( + child.in_features, child.out_features, + bias=(child.bias is not None) + ) + if child.bias is not None: + activationPruneLinear.bias = child.bias + activationPruneLinear.weight = child.weight + module._modules[name] = activationPruneLinear + layer_cnt += 1 + else: + addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + layer_cnt = 0 + if not inplace: + model = copy.deepcopy(model) + addActivationPruneOp( model) # 为每一个卷积层添加输入特征图剪枝操作 + return model + +def getModel(modelName): + if modelName == 'LeNet': + return getLeNet() # 加载原始模型框架 + elif modelName == 'AlexNet': + return getAlexnet() + elif modelName == 'VGG16': + return get_vgg16() + elif modelName == 'SqueezeNet': + return get_squeezenet() + elif modelName == 'ResNet': + return get_resnet18() + elif modelName == 'InceptionV3': + return get_inception_v3() + # if modelName == 'MobileNet': + # return mobilenetv3_large() + +def getDataSet(modelName,batchSize,imgSize): + if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet' or modelName=='InceptionV3': + dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', + img_size=imgSize) # 确定数据集 + elif modelName == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) + + return dataloaders,dataset_sizes + +def getPruneModel(model_name, weight_file_path,pattern,ratio): + model_orign = getModel(model_name) + if pattern == 'test' or pattern == 'retrain': + model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 + activationPruneModel = prepare(model_orign,ratio) + + return activationPruneModel + +def activationPruneModelOp(model_name, batch_size, img_size,pattern,ratio,epoch): + dataloaders, dataset_sizes = getDataSet(model_name, batch_size, img_size) + criterion = nn.CrossEntropyLoss() + + if pattern == 'retrain' or pattern == 'train': + weight_file_path = './pth/' + model_name + '/ratio=0'+ '/Activation' + '/best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.01, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, ratio, 'activation',pattern, criterion=criterion,optimizer=optimizer, name=model_name, + scheduler=scheduler, num_epochs=epoch, rerun=False) # 进行模型的训练 + if pattern == 'test': + weight_file_path = './pth/' + model_name + '/ratio=' + str(ratio) + '/Activation/' + 'best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + test_model(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion) + +