From 93658e4fe61759ab75c25559f8fff33305d2e4ec Mon Sep 17 00:00:00 2001 From: liying3 Date: Sun, 21 Sep 2014 22:02:46 -0400 Subject: [PATCH 1/2] addFile --- Part2/matrix/matrix.sln | 20 +++ Part2/matrix/matrix/matrix.vcxproj | 86 ++++++++++ Part2/matrix/matrix/matrix.vcxproj.filters | 22 +++ Part2/matrix/matrix/matrix_math.cu | 174 +++++++++++++++++++++ README_YingLi.md | 9 ++ 5 files changed, 311 insertions(+) create mode 100644 Part2/matrix/matrix.sln create mode 100644 Part2/matrix/matrix/matrix.vcxproj create mode 100644 Part2/matrix/matrix/matrix.vcxproj.filters create mode 100644 Part2/matrix/matrix/matrix_math.cu create mode 100644 README_YingLi.md diff --git a/Part2/matrix/matrix.sln b/Part2/matrix/matrix.sln new file mode 100644 index 0000000..6de51e4 --- /dev/null +++ b/Part2/matrix/matrix.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix", "matrix\matrix.vcxproj", "{9D12EC35-948A-4D33-A704-5AB4EF052E8B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Debug|Win32.ActiveCfg = Debug|Win32 + {9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Debug|Win32.Build.0 = Debug|Win32 + {9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Release|Win32.ActiveCfg = Release|Win32 + {9D12EC35-948A-4D33-A704-5AB4EF052E8B}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Part2/matrix/matrix/matrix.vcxproj b/Part2/matrix/matrix/matrix.vcxproj new file mode 100644 index 0000000..1bdb52e --- /dev/null +++ b/Part2/matrix/matrix/matrix.vcxproj @@ -0,0 +1,86 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {9D12EC35-948A-4D33-A704-5AB4EF052E8B} + matrix + + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + + Level3 + Disabled + ProgramDatabase + + + true + %(AdditionalLibraryDirectories) + cudart.lib;%(AdditionalDependencies) + + + + + + + + + Level3 + MaxSpeed + true + true + + + true + true + true + + + + + + + + + + + + Document + + + + + + + \ No newline at end of file diff --git a/Part2/matrix/matrix/matrix.vcxproj.filters b/Part2/matrix/matrix/matrix.vcxproj.filters new file mode 100644 index 0000000..f07abd3 --- /dev/null +++ b/Part2/matrix/matrix/matrix.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/Part2/matrix/matrix/matrix_math.cu b/Part2/matrix/matrix/matrix_math.cu new file mode 100644 index 0000000..9e669f6 --- /dev/null +++ b/Part2/matrix/matrix/matrix_math.cu @@ -0,0 +1,174 @@ +#include +#include +#include +#include +//#include + +using namespace std; + +#define WIDTH 5 +#define MSIZE 25 +#define numBlocks 1 +dim3 threadsPerBlock(WIDTH, WIDTH); + +__global__ void matAdd(float* Ad, float *Bd, float *Pd) +{ + int tx = threadIdx.x; + int ty = threadIdx.y; + + float a = Ad[ty * WIDTH + tx]; + float b = Bd[ty * WIDTH + tx]; + + Pd[ty * WIDTH + tx] = a + b; +} + +__global__ void matSub(float* Ad, float *Bd, float *Pd) +{ + int tx = threadIdx.x; + int ty = threadIdx.y; + + float a = Ad[ty * WIDTH + tx]; + float b = Bd[ty * WIDTH + tx]; + + Pd[ty * WIDTH + tx] = a - b; +} + +__global__ void matMul(float* Ad, float *Bd, float *Pd) +{ + int tx = threadIdx.x; + int ty = threadIdx.y; + + float pValue = 0.0f; + for (int k = 0; k < WIDTH; k++) + { + float a = Ad[ty * WIDTH + k]; + float b = Bd[k * WIDTH + tx]; + pValue += a * b; + } + + + Pd[ty * WIDTH + tx] = pValue; +} + + +void matSerialAdd(float *A, float *B, float *P) +{ + for (int r = 0; r < WIDTH; r++) + { + for (int c = 0; c < WIDTH; c++) + { + P[r * WIDTH + c] = A[r * WIDTH + c] + B[r * WIDTH + c]; + } + } +} + +void matSerialSub(float *A, float *B, float *P) +{ + for (int r = 0; r < WIDTH; r++) + { + for (int c = 0; c < WIDTH; c++) + { + P[r * WIDTH + c] = A[r * WIDTH + c] - B[r * WIDTH + c]; + } + } +} + +void matSerialMul(float *A, float *B, float *P) +{ + for (int r = 0; r < WIDTH; r++) + { + for (int c = 0; c < WIDTH; c++) + { + float pValue = 0.0f; + for (int k = 0; k < WIDTH; k++) + { + pValue += A[r * WIDTH + k] * B[k * WIDTH + c]; + } + P[r * WIDTH + c] = pValue; + } + } +} + + +int main() +{ + float *A = new float[MSIZE]; + float *B = new float[MSIZE]; + float *P = new float[MSIZE]; + float *serialP = new float[MSIZE]; + for (int i = 0; i < MSIZE; i++) + { + A[i] = i; + B[i] = i; + } + + //clock_t start; + //double durationGPU, durationCPU; + + //load A, B to device memory + int size = MSIZE * sizeof(float); + float *Ad, *Bd, *Pd; + + cudaMalloc((void**)&Ad, size); + cudaMemcpy(Ad, A, size, cudaMemcpyHostToDevice); + + cudaMalloc((void**)&Bd, size); + cudaMemcpy(Bd, B, size, cudaMemcpyHostToDevice); + + cudaMalloc((void**)&Pd, size); + + //add + //start = clock(); + matAdd<<< numBlocks, threadsPerBlock >>>(Ad, Bd, Pd); + cudaMemcpy(P, Pd, size, cudaMemcpyDeviceToHost); + //durationGPU = (clock() - start) / (double)CLOCKS_PER_SEC; + + //start = clock(); + matSerialAdd(A, B, serialP); + //durationCPU = (clock() - start) / (double)CLOCKS_PER_SEC; + + for (int i = 0; i < MSIZE; i++) + assert(P[i] == serialP[i]); + cout << "Matrix Addition Success!" << endl; + //cout << "CPU Timing: " << durationCPU << endl; + //cout << "GPU Timing: " << durationGPU << endl<>>(Ad, Bd, Pd); + cudaMemcpy(P, Pd, size, cudaMemcpyDeviceToHost); + //durationGPU = (clock() - start) / (double)CLOCKS_PER_SEC; + + //start = clock(); + matSerialSub(A, B, serialP); + //durationCPU = (clock() - start) / (double)CLOCKS_PER_SEC; + + for (int i = 0; i < MSIZE; i++) + assert(P[i] == serialP[i]); + std::cout << "Matrix Subtraction Success!" << std::endl; + //cout << "CPU Timing: " << durationCPU << endl; + //cout << "GPU Timing: " << durationGPU << endl<>>(Ad, Bd, Pd); + cudaMemcpy(P, Pd, size, cudaMemcpyDeviceToHost); + //durationGPU = (clock() - start) / (double)CLOCKS_PER_SEC; + + //start = clock(); + matSerialMul(A, B, serialP); + //durationCPU = (clock() - start) / (double)CLOCKS_PER_SEC; + + for (int i = 0; i < MSIZE; i++) + assert(P[i] == serialP[i]); + std::cout << "Matrix Dot Multiplication Success!" << std::endl; + //cout << "CPU Timing: " << durationCPU << endl; + //cout << "GPU Timing: " << durationGPU << endl< Date: Sun, 21 Sep 2014 22:09:36 -0400 Subject: [PATCH 2/2] udpate part1 --- Part1/PROJ_WIN/CIS565_PROJ_1.suo | Bin 14336 -> 18944 bytes .../CIS565_PROJ_1/CIS565_PROJ_1.vcxproj | 4 +- Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb | Bin 446464 -> 471040 bytes Part1/PROJ_WIN/src/kernel.cu.deps | 1137 +++++++++-------- Part1/src/kernel.cu | 43 +- 5 files changed, 613 insertions(+), 571 deletions(-) diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1.suo b/Part1/PROJ_WIN/CIS565_PROJ_1.suo index ad46c84a9a00137f71e370ba57e24d2f64f37088..d73b1db2f0e5ae5b443685b14c8243ecc6dbf975 100644 GIT binary patch literal 18944 zcmeHPdvH|M8NbUz5fr3?T52KkP!M*ruk0ouNOm_72&Bg#p&4RbR3y!Y5!=ob;drXO8WcVdr$W6W*>Kx z5TL`^`R+OQ+;i{uopZkPz0OJAJvRG;!!OM{F6E^)(q!r6&;`l94h6@aM#&iDC%3josqGXT>8p95S7xCk&4Fbe<{kY)oe z0n7o+1zZZa4Dflt<$!sBF95y>SO6fe%tu}YSOmBdun@p{!nriDBhVzp@k?Vq8l)KV zq|}euFrO|3*9eNqN2SsSji04;$X8TW>E#^wBJLaqtGCbga=;3J6<`BY1MC0?pak%gaPe<2%rPN8Seyi0lEQEz%BrB zD|#M79@opaAy4Y%H1dpI-i@3qiSO-2z7H^<-`|e>OM3Z{_Xpb3sei0}{+~;itv&IF zPf%S^`n#VTavGBbxY5TU=1dooyLYsv0&;t%E0~m1ax5KAN8_cl^%Qvb8Im!2$)A6Lte$L3+W9<7VQVTdN zAq7>LlR}v?P=+j~>>MQ*5YaoBPJ}Cn@4TW3f)=p+yy^1*6!L?oZQSP>3 zMWlhf8+YYoFF#QQ{Io@AYna7P`-U=;DIdQNG{~T#2cr_lcu*&nqGKua4`mbWBD44@ z`zafk^6^vuaqJVo8wCZilGrILCs~M>tI#a|>1ZqT51Du)pix>0`J_PmhamUba7|$5 zL*NWeKg*yh4e6`Zg;AT<%T17lH$a}XK~`?Y8(K<3B3mI9qmaTGe3!GT^@rcgzE7RQ zl>fd~YyQgb+yU^b{m`iH0+nIluR`ylpn)7!B`w7W*Ge8}mXNdw^%1p)DU5X|Xsm+t z<|@eGO$B$&sLMbyllCfgE!eOgLTKg3mG7pJr;taGOVUFVP6J0)K`%R13IrwMz8ppQ z{Kt`sp})=OZ$|AEHB#{e{!{smyvJ0$W%dbuip>;XNX7B@qffMmsP&2`@;`Z*_JUdb z)QRGY`S>@1OL~Eoc5s_M7IA5J(N(xYUw@ZWfS-O6Z5yV1{DtL@krM4aadiVfZ7kZe zX8TY7OsxM?u__94q*i+-tPyf}1boGHEvu_mjApTQp4%hi`+QSpW$_U;A4^>&-?Zdl zt^8)Ja(}7=Mih~w_u#yeYIWKSX)-!;&xxEaX67{*-l$xBTw`k)^`Egh+JH>7H)-)FfsqzZ)>6u~{5T%8g}Jhf7V*ka=zrSkM^x<8e{rl|@|*T$ zqoxbu8GRCAITjiZyFUN4Da_9Q6=*xCE5K6tMJyDnnv#I2yA>5(%>;9ytY4v8$p}qm$L0NHEuYbw5?L3 zgyZ=|8>|{xm-dV^59mII3*%&i2j^2I4Wl1EwCczi14b^#mzJ#WQ3)Mxm5zE)+=yBB zVjdeXvz%j=>GeDC3t+~r;0!(^f7tP>2l!P!A>TBj?82-rp7B}z@&|vi?xo*9ckGgH zwawZn%{)3!`SgNCTLy>j{r9oAjxZ#P7{#CGEcxa47A?B|%G>KW@mwWN;2zpK`LeK*J zpjd`K*oSKb^2Mgo!VQ#EX9}E?FQlml&!UQCk2A zoQe=_nc#vRMm8N;Fp!PnI%x6^mG>BZRKSgm;70nMnq8XPdgR?$`HvoN1X`Q&hdYkk zd)%n6hf;AjK?(OC7vqL|`exTZr5b%}rrh<`0xD>er-x(TN!6|>9)m;taZKxv`^Z&! zRA`i$yW;$ts(yH`%548vqHW*s=QVB~zD0qdQSMQm4sI^tDi#bl8tG8Q!qA14M7#xYMDv&%chQ{|vC`+Yi{| zEI$6UNoXsSO2Yo&+Gi@&Ry<3f@UniB-NhW%?xoE6QSDm_ydodDM)`y%$ka6QF@roz z1(vYc2r|9AdT1I-a>f$;h*t27CF92QUyH|O$*rr@{jx&uPqZfwVsNH@9eSWyj7eyZ z?4BK;^11Q!&dBF%BTn}WTF#sZN(2@-PTl>sb(s=iryPDQdL!N(nG-+vrf7ki#ZTMk`}(KK;n%kr^wuLY;;%wA^@&;h{b>6! zfTqmXLfiU{U^w%+(e`c8TAl5I0vZ;tMqR+@MD ziFYjjPm%2V7w)>v(=+Rqk50_k`qYcnM`HXK@>e&;g+`%t&wyqcT z`6o{Kw;tIOr}RDwojBz=r;*Wkw(d~vw{|reyXN4I!}U83{C9S0%BB@hX5OHmlcilN z8GEoD^RK}F(7doPbfOHNG<*7(f3q>kve1CT`0ZllARfG{KP<=po?;)->xrM9_c8!` zSEjl8coVK>f`%y^S2M+g91NeL3iQ)%Yd%{ z?grcgxEF9A;Ol?`0Jb}*=RD*5RlWQ*J%1SaBYOE!Ecdg3b0M} z7qlI(eqa$jqxW{YcD%A9o^N?-J2``(l6#^PqmhE6-ToT6fjS!VUm7$1{6VbDe3y2N zwg07A?VE4je$(!YTfaSb&9l9+*N;9=_Rbdqs2Y7Lr+>=r08#%j5`Gncgqfk_K{TSi zVJe5;t2HZn1^%1?DcaERU=Zcq`h7Y4Jj+xn@%;0ZXuxom+5R6wTk1`w^7u=8V8le{A$!hxggABUfv>?)U%UgKch5Kzw&SXB&p2yP?=rr z*0Ih%!(^b delta 2563 zcmcguUrbw77{9mNmP;w+U+G$4a6ude0dJw@|DTp#I&u6fIEQ*=)VAPd{g>WbWQbA| zA9yfv>$j*eF~0a?qbLxW+K4{{LZ+l?LV&d>S{XnNg!gJ$mU?d^dhd7NO>(5|6t;#QA{}Mt}$1`d_tM>4AZk_2I4#J3tN@lG6CRZrB zb9qkJ<%@VP@0?WDM*nhwkL9nrSVqGX;8OT{pa~TH1dBTNEQBCV+|`fg(Rny3Q5h|Z z!Uu3Z{}gLNNvG1K`6!Sn(rCmxWCQLzxC|DUW^FWs+D^i@;a=M4f$JtaZGfb4iZS5Q zeMQFcJc_|lsmjo1NEVLeQN=heQn`F$m#PJPMOOH^zy{HR1z0Hh1VpwSQpKhRV1Zed z#DNQ3C9d26UW7;`!B<-WA69=>*h|HzI2EFzR6kWf>EXw|2z=gA3*VQRG!&IK2^miq zRn?%F#=%q8moD38v{`PqY@D1$SB}J+G%TCP7$TAb={OXEdiWDL;+xDNm1CLpWzQn< zJ1QYfPt9O3YN-vl);s|3T1o{^w|H6(C1mkceFmgUDlJU{F zyP?NOQ#8pQ(j?c>hXj*L4X$HUv^ zu08x69jKfPMPf=w6hiVjB^F07E0_lu=@{S;u|A~{jv%CAv&p>r#J-R5m)pFAza*uF z#}{QzHE9or+5d^MjP+{|SbtHKg~ykv=6_AqOr{d^%xAJ8jLw}^6-1>2_1 zbiKfG$dYe*=Ij3g3cpgYTtAEjq7S|sa01WdYkj!r2emGiz=h)@@Do#)t_otIsw5AV zi#!8sRh{X^E!bjs`enG=xR-wx_gX|?gWoEwVCk(QuN1WrZ|qQqi`y6QE%zR5a+B#6 zfhF%FGa_S`9|cDaEVnGvQMlW(O2;7i^cY-c?D=t=`VxT+ifg(G0$3dhM^Yn=HopZk z)ke74w@9iaR_V;Ui>%Hz*r;s)o7<8zeM^tkzVAw_N0Ud@;B+lwzW_QWV9UhAu495F UDM(&Z`x8#AjEw#2uG66T8y@=^t^fc4 diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj index 4c88226..6b18a53 100644 --- a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj +++ b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj @@ -30,7 +30,7 @@ - + @@ -114,6 +114,6 @@ - + \ No newline at end of file diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb b/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb index b662f859b27e4fb8b4753032a64cb6eb2e33341b..c47108b40cee9bba16117f6247e5c29138250d07 100644 GIT binary patch delta 19307 zcmeI4d301o7Kd+l7Dxlx6k^y35I`h!2S^BR35%=}T+oBZnNH|57zs&}ghh|o(HRgK zMWZN;%H}AXQ5eJ^5eLSo90f-ibUf}0ctmkzM&dpyW`5Q0zDMzRbk&)q`;W2CsdR4L z`|f>p>#O%VH?O;4eYb{#-R~UOsg8(RT=;=Duc>6(Ckff@zFoHnQp1YKAkjaM8S*eS=n3JViOy0HFkH;vC)g{?l8;y;V%1JK8H_jv0oGJ zUt!C)akd!?*!{OW_`W!sms$8&m>*){2ojjHlZ7wX@%=1c%>g~G#*z!N@8QuO*(!^J zv&RNUWQ&`Khpi90nB8-I3g0uJWIhC;0WY&&mc`nNr!8$K< z4_lAp(J7I$oWati;w(K+bKJQRX*%nEDJ(N9St{dM&ctZm*(E%BEyoPlZWbU16*@)EgqA4b7*PEwKU{j8cJ9iO57Mq zI@r8xrMrDfcmf6HSCo&dt*xn@P+nWHu)H>SS$%n6PGp3>Xmxi^pou4JCRSAiXO~w^ ztgfrCogb*Ltf^+3NhfSBuBDOxGYp2 z2vQ{2lv9kR1xyDQOFH93%m_w+sgm9ZFs*(OpJ3@Jy79H;q10S4I5 zWsQ5RXBv^a9vQ6htx8?%?%F4n+NXznMD55;@AM!#At%s#koT{e_n^CT*gt+s>FAOv ze*aXzf4I+IKR2{!R=wXVS}+hWe!=hl5&k*B+Uj7HKM)G}{FRmd;m{Wa3kye$^83YV zlm-IxL*>qh5~oe-=F1#9Vw(`kyWsNsJ%Y;iGp$emt?8G3$A)Rj`w!6*s29=(Ix zMV&G*fF^YF@oMSFMY*gCUqlI+ojSWn7ywJk06O#G;B==hR#n(6fK3f1SzcdLJ8%%S z?x#eICc776+-!+t&GU5A3`a8kKTGfF!dg+or&(2`_mq?(ZDvYE>KFAv>KkOR2*_cK zT$$psCEyd#ZNpeQ9D4)HCX`8op)a#2O(c&P5Hb!H2Xr%K9N3N$s7jIv`gxKp?8Cjq z@d=$rXAtGUNAcBI7A56H`Ej9KFDz5O1R0Pi7xGecC(11yG7^>YTi`%N`7+ozQGP4N z%@&n%W12Ci+?L%!HRW>JOv;t;_=XP~9*0Y$W>|*jHmsElkC1OicY?>#so+@w2P$~( zfQ=J8cVgUZ(cs}}#u^@bb~WIU)7B}&n41zFZ&5`&;L!}r@Z62HlHn2ZN^~c9ES(CT zd*DC?&%Lm5f@c-R%@z$Fo@T7!v1fO(`pP4x&4edYx{bLT4z3aJI+Q^9M)UUT_87Lt?gzf~CrBea*FdV3W+6Wsbpf+LLY|((??8X`>`=V+9C8y09 zs3R?UKoyRR#}&$}7nXr~6zeAgCFITMPC!{Y6;NB?Kn2tvVB-YTR*ai1Dp1BWV-A!p zyBhD8P6f{&;Xnn?F4#E1vm4`P ziv|x*GuH6fvpYonW?M>Ib6ZXF#I#Ja;v_0cqT=+uXH}P#?u~=20w)&7EQK9KKK_p0 z0k+!cc}En+13k^KjNX@+m5iQ{zY;TbsH0js6?$L8feO8EVB>_|w-`5DH1v3yu}06H zU5)FOoHl)U3WIUE>FI@Kc)rJ4$?ypI2XrTRES(CTAK^d+&vDo|!SfTw%@!3NW12CC z$Ch0Uc;vL{!xIR`<;tTMmf>;bpA3(X-RMs6SUMFv32>l-ClNMI@FZc}Y*FDcrWtd1 zY}wU-M^2kIJidbRc)+6>mf>lSwUVzqLQX+!Nb{& zH9Yo3)qqD%n?5{6v*PleM=vbH(*56f)MTN(h zX3XKSWmf|pIc?hT_zH{SQDW2#%kcETTFLMTxhJ|4JeE!cPcJx7!P6TyPVn@>xY?q? z!_$m4JofBrz$2$kAD&?|;{uOfScaz`)=Gv)$o?20U`w^x-Lp%ky$Ry|4_=V62r4kC4wncY?>#so)s`2P$~-VB-YOP>h=` zDm=zCV-AlkyBhGwY14+s8;H+)9=)&(Pd?U4hDXT5(4FA1bSij;!+{E(5wLNBrvT$- ziv|y8H`egj7ggh7OF3;OJohaiDlNnRkvb9W7)SKCD~TR~yrmt{uIWU(F@8VBH*_Xi zGL`7nzCMDJ!09V|ex8&6!Hk22ZsZyyz*Z zdQozUxd`fiPmU;$%U>;eVT+3+k4v{;YVr;ivY4*Z4z_fv9sDdDsCMvkuyNYKdoXUc zs5{u0X3Tf6ExQ^IOUP+6;W_znu7=H84@>w8isAv3W>^MlFV;^6O33@roq)1*DxhA3 z0~JtzhK&diBCGJb%Gj$?ypI0J;-A zmQDrFD{!EK=T+D^!Sh#)n=KkVJk40cW6!PzJaXFf;h9w!k3WM^U(^Z9@Vt(-lHn2Z z8|Y5(SUMFvZ^D5Jp0{A*1kc+TH(OMAjA_Ok9$R)bZe?=XOn6Sdl}&hA>sID1jLWS| zGb{u3KGshLO2{9eI{{_sR6res0~Jt*VB-YThZr|oRG^IP#vCZyqH0`?Pe=P1@nhDXSe$IKiazOr;Gc>Vz=DtJDJjT1b_FmASJ@bENa4Uav$ zsV6`8x1%BX$)s@Wji@cupz<*aI}!DQPSmyp~%r2C7i_wc3BD0HdIG;phV+G z1?6c}Uv7f)k8pyAP(hiF%F6SCCSr-eO)3gz`31((c!T)RK9+Kw7f^$d3jy{h)Usyd*wujhKES+q8jiUD zAAfeiXbaYRD3rTL6Ria-y>7stSN%|mgOe9Z0n#K?#sE?~Fz|VnQ3T7f6~Q9^S$eWX z6vG})y0pjyv}RXB za`Vrd^5;!?q8j{vxG7PdK6O(nH{LVG#uGQ6Mff@u`^MXgjVHR;c;YKoM2l>^=F&Tp zcc!`eHm9slUg%oYuq`>U6RsgAzHi0&u9Z31+v8L3DoeD!wBR#a6~$lW;Mj cs_0+ZA{>oLb%mN-p|-T9^w6=i#%byQ0%ESbY5)KL delta 3477 zcmcK4dz_VH9suxj=5owXohg;5G~Kgqwd<1WHc3sa8o3ljlWsHJoQP1IDVkEKlx~!S zR4Uc6&6K2~o3P8eE5^OWt}M&i*nD5(^ zRSNgFlXV9g5Bh_eWL+|^8Y}DKKW|iZLUN(xMh6>D)+J+5-|S%QfAFFM)O1McPt6as z>95tq4^;eX{m*!Fs`i&Ou6$g{M;65YCAd2&+^ko2p$Q$&KlOTjwe~qQ)~#H|3)_PG|#Y^Eo@~Q+j*89 zJje6Az@PXtFY*$9;bmUoRbJzD-r!B%Vkf(Jn|Fv;IK9hz-0AuDqdx-}ND;*hVlYD( z$}p}b#&E9TT1Idk*E5nExRIL}#b|Ek7UE(5{}Ra<#&RpSF^-qr&R2Mq*La;bc$2r- z$u6Go!_QNH7n7O7RA%rb>v@U|Y-AHp6THlu!|wkU$yTb3D%r+(8e{4>qGS z`E;TtQL1wohf<9i@Vu~2dAcir7p~SeF zYZ%T5uH|~J;|4}@6E_AwVYFlvw{SCKDB)JdGLG9APbs%Efk{l{PVQhDcQci{n9LOJ zVLG#!#Z2aKANSfnG>^F~XBqeN2oEry1w6<@Jj_Ctu$VfHCf9Ubl_7(-Nah_+d_LdR-VgxdNZFHWEuNVeM9jC z?SIKzizhLWdGsOM_-aC~qq6)gCDSPHdok2y3*z6t{@e zn9MiEi^b;pj}gDrZYBQ*r)e*u6N?$b`^FRETRki{%z36HWO`2H`KEaNc}tYj5`DrocZIVBb{gYSOy>Gpl3j$d124+ z%;{f5nuYr(@(Oy^7#k~%g-c^;rLpwVSjNIwX2t3ik+zv-J!VA;!-@6ROjQy9MZ^_^YElength(); i++) + { + glm::vec4 r4 = their_pos[i] - my_pos; + glm::vec3 r(r4.x, r4.y, r4.z); + float s = (G * their_pos[i].w / pow(pow(glm::length(r),2) + pow(ZERO_ABSORPTION_EPSILON,2), 1.5)); + acc[0] += s * r.x; + acc[1] += s * r.y; + acc[2] += s * r.z; + } + glm::vec3 starR(-my_pos.x, -my_pos.y, -my_pos.z); + float starS = (G * starMass / pow(pow(glm::length(starR),2) + pow(ZERO_ABSORPTION_EPSILON,2), 1.5)); + acc[0] += starS * starR.x; + acc[1] += starS * starR.y; + acc[2] += starS * starR.z; + } + return acc; } // TODO : update the acceleration of each body __global__ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) { // FILL IN HERE + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index < N) + { + acc[index] = accelerate(N, pos[index], pos); + } } // TODO : update velocity and position using a simple Euler integration scheme __global__ void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc) { // FILL IN HERE + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index < N) + { + glm::vec3 vt = vel[index] + acc[index] * dt; + glm::vec3 p(pos[index].x, pos[index].y, pos[index].z); + p += (vel[index] + vt) * dt / 2.0f; + pos[index].x = p.x; + pos[index].y = p.y; + pos[index].z = p.z; + vel[index].x = vt.x; + vel[index].y = vt.y; + vel[index].z = vt.z; + } } // Update the vertex buffer object @@ -180,6 +217,10 @@ void initCuda(int N) void cudaNBodyUpdateWrapper(float dt) { // FILL IN HERE + updateF<<< blockSize, threadsPerBlock >>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + cudaThreadSynchronize(); + updateS<<< blockSize, threadsPerBlock >>>(numObjects, dt, dev_pos, dev_vel, dev_acc); + cudaThreadSynchronize(); } void cudaUpdateVBO(float * vbodptr, int width, int height)