Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
d866b7e
Updated title
Jan 18, 2025
32966a4
test
jrazo8 Jan 18, 2025
ab6c9f1
testing2
jrazo8 Jan 18, 2025
05a0c72
test
jrazo8 Jan 18, 2025
384fa14
added summary of 1st research paper
jrazo8 Jan 19, 2025
b7e0f12
test_2
jrazo8 Jan 19, 2025
5157a5b
week_1
jrazo8 Jan 19, 2025
8b21db9
week 2
jrazo8 Jan 26, 2025
f266d14
adding references
jrazo8 Jan 26, 2025
1a49058
update weeks
jrazo8 Jan 26, 2025
6890905
updated summary
jrazo8 Jan 27, 2025
5186541
week 4
jrazo8 Feb 3, 2025
9af5121
intro
jrazo8 Feb 3, 2025
f4a2e5a
intro + lit review
jrazo8 Feb 10, 2025
9f14cc4
Adding more lit reviews
jrazo8 Feb 11, 2025
75a8ced
feedback update
jrazo8 Feb 13, 2025
cdc9ccd
changed the intro and lit review and added new references
jrazo8 Feb 17, 2025
e247a71
Added dataset link
jrazo8 Feb 21, 2025
6904207
update
jrazo8 Feb 21, 2025
a388e49
updated lit review
jrazo8 Feb 21, 2025
4ef74e1
removed some references
jrazo8 Feb 21, 2025
259b6f7
imported data
jrazo8 Feb 22, 2025
18fd84d
update
jrazo8 Feb 22, 2025
945fb77
updated methods section
jrazo8 Feb 22, 2025
b763eb4
updated methods section again
jrazo8 Feb 22, 2025
9c51521
updated references
jrazo8 Feb 22, 2025
cc84723
update
jrazo8 Feb 22, 2025
6210189
Added table
jrazo8 Feb 22, 2025
24b6620
Added knn model
jrazo8 Feb 22, 2025
a73445c
Added summary table
jrazo8 Feb 23, 2025
c93e2b2
added visualization
jrazo8 Feb 23, 2025
d621395
Updated visualization analysis
LabMage Feb 24, 2025
3faadeb
Merge pull request #1 from jrazo8/my-feature-branch
LabMage Feb 25, 2025
e1fab25
Updated feature with latest changes
LabMage Mar 1, 2025
404a4c0
Merge pull request #2 from jrazo8/my-feature-branch
LabMage Mar 1, 2025
f9e5f1d
Save progress 1
jrazo8 Mar 2, 2025
9b46476
Removed section we don't need
jrazo8 Mar 2, 2025
6b8367f
Merge pull request #3 from jrazo8/methods-branch
jrazo8 Mar 2, 2025
ba8f6a6
Changed subheadings
jrazo8 Mar 2, 2025
54e4c08
Merge remote-tracking branch 'origin/main'
jrazo8 Mar 2, 2025
e8cedf7
Added visual
jrazo8 Mar 3, 2025
6b2e69b
git push origin mainMerge branch 'methods-branch'
jrazo8 Mar 3, 2025
1fd90d7
update methods section
jrazo8 Mar 3, 2025
2e0d29f
Updated Data Exploration and Visualization
LabMage Mar 4, 2025
4a244eb
Merge pull request #4 from jrazo8/my-feature-branch
LabMage Mar 4, 2025
0574240
Updated project files including RStudio settings
LabMage Mar 10, 2025
0b16d3b
Updated project files including RStudio settings
LabMage Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
4,796 changes: 4,796 additions & 0 deletions EDA.html

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion STA6257_Project.Rproj
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Version: 1.0
ProjectId: 750cfe7e-2bcb-4f93-af01-9511e2811dcc

RestoreWorkspace: Default
SaveWorkspace: Default
Expand Down
253,681 changes: 253,681 additions & 0 deletions cdc_data.csv

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions eda.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Exploratory Data Analysis,Count
Number of Nulls,0
Missing Data,0
Duplicate Rows,24206
Total Rows,253680
Binary file added images/.DS_Store
Binary file not shown.
Binary file added images/kNN_picture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3,541 changes: 2,906 additions & 635 deletions index.html

Large diffs are not rendered by default.

1,189 changes: 1,108 additions & 81 deletions index.qmd

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions knn_tuning_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

194 changes: 142 additions & 52 deletions references.bib
Original file line number Diff line number Diff line change
@@ -1,62 +1,152 @@
@article{adams2021topology,
title={Topology applied to machine learning: From global to local},
author={Adams, Henry and Moy, Michael},
journal={Frontiers in Artificial Intelligence},
@article{zhang2016introduction,
title={Introduction to machine learning: k-nearest neighbors},
author={Zhang, Zhongheng},
journal={Annals of translational medicine},
volume={4},
pages={668302},
year={2021},
publisher={Frontiers Media SA}
number={11},
year={2016},
publisher={AME Publications}
}

@article{zhang2017efficient,
title={Efficient kNN classification with different numbers of nearest neighbors},
author={Zhang, Shichao and Li, Xuelong and Zong, Ming and Zhu, Xiaofeng and Wang, Ruili},
journal={IEEE transactions on neural networks and learning systems},
volume={29},
number={5},
pages={1774--1785},
year={2017},
publisher={IEEE}
}

@article{kataria2013review,
title={A review of data classification using k-nearest neighbour algorithm},
author={Kataria, Aman and Singh, MD},
journal={International Journal of Emerging Technology and Advanced Engineering},
volume={3},
number={6},
pages={354--360},
year={2013}
}

@article{daffertshofer2004pca,
title={PCA in studying coordination and variability: a tutorial},
author={Daffertshofer, Andreas and Lamoth, Claudine JC and Meijer, Onno G and Beek, Peter J},
journal={Clinical biomechanics},
volume={19},
number={4},
pages={415--428},
year={2004},
@article{syriopoulos2023k,
title={k NN Classification: a review},
author={Syriopoulos, Panos K and Kalampalikis, Nektarios G and Kotsiantis, Sotiris B and Vrahatis, Michael N},
journal={Annals of Mathematics and Artificial Intelligence},
pages={1--33},
year={2023},
publisher={Springer}
}

@article{deng2016efficient,
title={Efficient kNN classification algorithm for big data},
author={Deng, Zhenyun and Zhu, Xiaoshu and Cheng, Debo and Zong, Ming and Zhang, Shichao},
journal={Neurocomputing},
volume={195},
pages={143--148},
year={2016},
publisher={Elsevier}
}

@article{wang2014,
title={Generalized estimating equations in longitudinal data analysis: a review and recent developments},
author={Wang, Ming},
journal={Advances in Statistics},
volume={2014},
year={2014},
publisher={Hindawi}
}

@Manual{R-base,
title = {R: A Language and Environment for Statistical
Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2019},
url = {https://www.R-project.org},
}

@book{efr2008,
title={Nonparametric Curve Estimation: Methods, Theory, and Applications},
author={Efromovich, S.},
isbn={9780387226385},
lccn={99013253},
series={Springer Series in Statistics},
url={https://books.google.com/books?id=mdoLBwAAQBAJ},
year={2008},
publisher={Springer New York}
}
@article{bro2014principal,
title={Principal component analysis},
author={Bro, Rasmus and Smilde, Age K},
journal={Analytical methods},
volume={6},
number={9},
pages={2812--2831},
@article{ali2020diabetes,
title={Diabetes classification based on KNN},
author={Ali, AMEER and Alrubei, MOHAMMED and Hassan, LF Mohammed and Al-Ja'afari, M and Abdulwahed, Saif},
journal={IIUM Engineering Journal},
volume={21},
number={1},
pages={175--181},
year={2020}
}

@article{saxena2014diagnosis,
title={Diagnosis of diabetes mellitus using k nearest neighbor algorithm},
author={Saxena, Krati and Khan, Zubair and Singh, Shefali},
journal={International Journal of Computer Science Trends and Technology (IJCST)},
volume={2},
number={4},
pages={36--43},
year={2014},
publisher={Royal Society of Chemistry}
publisher={Citeseer}
}

@inproceedings{panwar2016k,
title={K-nearest neighbor based methodology for accurate diagnosis of diabetes mellitus},
author={Panwar, Madhuri and Acharyya, Amit and Shafik, Rishad A and Biswas, Dwaipayan},
booktitle={2016 sixth international symposium on embedded computing and system design (ISED)},
pages={132--136},
year={2016},
organization={IEEE}
}

@article{suriya2023type,
title={Type 2 Diabetes Prediction using K-Nearest Neighbor Algorithm},
author={Suriya, S and Muthu, J Joanish},
journal={Journal of Trends in Computer Science and Smart Technology},
volume={5},
number={2},
pages={190--205},
year={2023}
}

@article{iparraguirre2023application,
title={Application of machine learning models for early detection and accurate classification of type 2 diabetes},
author={Iparraguirre-Villanueva, Orlando and Espinola-Linares, Karina and Flores Casta{\~n}eda, Rosalynn Ornella and Cabanillas-Carbonell, Michael},
journal={Diagnostics},
volume={13},
number={14},
pages={2383},
year={2023},
publisher={MDPI}
}

@inproceedings{khateeb2017efficient,
title={Efficient heart disease prediction system using K-nearest neighbor classification technique},
author={Khateeb, Nida and Usman, Muhammad},
booktitle={Proceedings of the international conference on big data and internet of thing},
pages={21--26},
year={2017}
}

@article{altamimi2024automated,
title={An automated approach to predict diabetic patients using KNN imputation and effective data mining techniques},
author={Altamimi, Abdulaziz and Alarfaj, Aisha Ahmed and Umer, Muhammad and Alabdulqader, Ebtisam Abdullah and Alsubai, Shtwai and Kim, Tai-hoon and Ashraf, Imran},
journal={BMC Medical Research Methodology},
volume={24},
number={1},
pages={221},
year={2024},
publisher={Springer}
}

@article{theerthagiri2022diagnosis,
title={Diagnosis and classification of the diabetes using machine learning algorithms},
author={Theerthagiri, Prasannavenkatesan and Ruby, A Usha and Vidya, J},
journal={SN Computer Science},
volume={4},
number={1},
pages={72},
year={2022},
publisher={Springer}
}

@article{uddin2022comparative,
title={Comparative performance analysis of K-nearest neighbour (KNN) algorithm and its different variants for disease prediction},
author={Uddin, Shahadat and Haque, Ibtisham and Lu, Haohui and Moni, Mohammad Ali and Gide, Ergun},
journal={Scientific Reports},
volume={12},
number={1},
pages={6256},
year={2022},
publisher={Nature Publishing Group UK London}
}

@article{mucherino2009k,
title={K-nearest neighbor classification},
author={Mucherino, Antonio and Papajorgji, Petraq J and Pardalos, Panos M and Mucherino, Antonio and Papajorgji, Petraq J and Pardalos, Panos M},
journal={Data mining in agriculture},
pages={83--106},
year={2009},
publisher={Springer}
}