diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 359dfac..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.Rhistory b/.Rhistory deleted file mode 100644 index e082170..0000000 --- a/.Rhistory +++ /dev/null @@ -1 +0,0 @@ -ts diff --git a/.Rproj.user/0CDE63C2/jobs/50965170-output.json b/.Rproj.user/0CDE63C2/jobs/50965170-output.json deleted file mode 100644 index ba1b4c2..0000000 --- a/.Rproj.user/0CDE63C2/jobs/50965170-output.json +++ /dev/null @@ -1,98 +0,0 @@ -[1,"==> quarto preview index.qmd --to html --no-watch-inputs --no-browse\n\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n"] -[1,"\u001B[1m\u001B[34mTerminating existing preview server....\u001B[39m\u001B[22m\u001B[1m\u001B[34mDONE\n\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34mPreparing to preview\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[1/1] snippets.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"\u001B[32mWatching files for changes\u001B[39m\n\u001B[32mBrowse at \u001B[39m\u001B[4m\u001B[32mhttp://localhost:3122/\u001B[39m\u001B[24m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-11.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-11.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-11.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-11.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-11.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-11.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-04.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-04.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-04.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-04.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-04.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-04.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] diff --git a/.Rproj.user/0CDE63C2/pcs/files-pane.pper b/.Rproj.user/0CDE63C2/pcs/files-pane.pper deleted file mode 100644 index d1a259e..0000000 --- a/.Rproj.user/0CDE63C2/pcs/files-pane.pper +++ /dev/null @@ -1,9 +0,0 @@ -{ - "sortOrder": [ - { - "columnIndex": 2, - "ascending": true - } - ], - "path": "~/Documents/analytics_at_scale/.github/workflows" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/pcs/source-pane.pper b/.Rproj.user/0CDE63C2/pcs/source-pane.pper deleted file mode 100644 index b074a4f..0000000 --- a/.Rproj.user/0CDE63C2/pcs/source-pane.pper +++ /dev/null @@ -1,3 +0,0 @@ -{ - "activeTab": 1 -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper b/.Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper deleted file mode 100644 index f5a0adc..0000000 --- a/.Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper +++ /dev/null @@ -1,14 +0,0 @@ -{ - "left": { - "splitterpos": 218, - "topwindowstate": "NORMAL", - "panelheight": 879, - "windowheight": 917 - }, - "right": { - "splitterpos": 237, - "topwindowstate": "NORMAL", - "panelheight": 879, - "windowheight": 917 - } -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/pcs/workbench-pane.pper b/.Rproj.user/0CDE63C2/pcs/workbench-pane.pper deleted file mode 100644 index 5699060..0000000 --- a/.Rproj.user/0CDE63C2/pcs/workbench-pane.pper +++ /dev/null @@ -1,5 +0,0 @@ -{ - "TabSet1": 1, - "TabSet2": 4, - "TabZoom": {} -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/rmd-outputs b/.Rproj.user/0CDE63C2/rmd-outputs deleted file mode 100644 index c4e7f90..0000000 --- a/.Rproj.user/0CDE63C2/rmd-outputs +++ /dev/null @@ -1,6 +0,0 @@ -~/Documents/analytics_at_scale/snippets.html - - - - - diff --git a/.Rproj.user/0CDE63C2/saved_source_markers b/.Rproj.user/0CDE63C2/saved_source_markers deleted file mode 100644 index 2b1bef1..0000000 --- a/.Rproj.user/0CDE63C2/saved_source_markers +++ /dev/null @@ -1 +0,0 @@ -{"active_set":"","sets":[]} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/19AA840D b/.Rproj.user/0CDE63C2/sources/prop/19AA840D deleted file mode 100644 index 1737d9e..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/19AA840D +++ /dev/null @@ -1,13 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "9,0", - "scrollLine": "0", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "295:0", - "docOutlineSize": "141" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/503D05BB b/.Rproj.user/0CDE63C2/sources/prop/503D05BB deleted file mode 100644 index eb1ff25..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/503D05BB +++ /dev/null @@ -1,6 +0,0 @@ -{ - "source_window_id": "", - "Source": "Source", - "cursorPosition": "42,45", - "scrollLine": "0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/56B42243 b/.Rproj.user/0CDE63C2/sources/prop/56B42243 deleted file mode 100644 index 72cc636..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/56B42243 +++ /dev/null @@ -1,13 +0,0 @@ -{ - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualMode": "true", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "30:0", - "rmdVisualWrapConfigured": "true", - "docOutlineVisible": "1", - "docOutlineSize": "259" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/6BFBD54A b/.Rproj.user/0CDE63C2/sources/prop/6BFBD54A deleted file mode 100644 index e540033..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/6BFBD54A +++ /dev/null @@ -1,7 +0,0 @@ -{ - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "33,3", - "scrollLine": "50" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/7377711F b/.Rproj.user/0CDE63C2/sources/prop/7377711F deleted file mode 100644 index bdf4b18..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/7377711F +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "13,69", - "scrollLine": "0", - "rmdVisualModeLocation": "202:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/A0008A94 b/.Rproj.user/0CDE63C2/sources/prop/A0008A94 deleted file mode 100644 index 40ec751..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/A0008A94 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualModeLocation": "2:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/B5CF2C5C b/.Rproj.user/0CDE63C2/sources/prop/B5CF2C5C deleted file mode 100644 index 9215d1f..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/B5CF2C5C +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "2,47", - "scrollLine": "0", - "rmdVisualModeLocation": "121:6" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/BF877C30 b/.Rproj.user/0CDE63C2/sources/prop/BF877C30 deleted file mode 100644 index 40ec751..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/BF877C30 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualModeLocation": "2:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/C25DB8A1 b/.Rproj.user/0CDE63C2/sources/prop/C25DB8A1 deleted file mode 100644 index d8354c2..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/C25DB8A1 +++ /dev/null @@ -1,6 +0,0 @@ -{ - "source_window_id": "", - "Source": "Source", - "cursorPosition": "25,30", - "scrollLine": "2" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/C752720B b/.Rproj.user/0CDE63C2/sources/prop/C752720B deleted file mode 100644 index cc3d37d..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/C752720B +++ /dev/null @@ -1,9 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "2:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/C9422BD5 b/.Rproj.user/0CDE63C2/sources/prop/C9422BD5 deleted file mode 100644 index 6e20cec..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/C9422BD5 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "158:0", - "cursorPosition": "25,78", - "scrollLine": "0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/DB772C11 b/.Rproj.user/0CDE63C2/sources/prop/DB772C11 deleted file mode 100644 index cac365c..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/DB772C11 +++ /dev/null @@ -1,9 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "120:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/INDEX b/.Rproj.user/0CDE63C2/sources/prop/INDEX deleted file mode 100644 index b7b0f5d..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/INDEX +++ /dev/null @@ -1,12 +0,0 @@ -~%2FDocuments%2Fanalytics_at_scale%2F.github%2Fworkflows%2Fquarto-publish.yml="503D05BB" -~%2FDocuments%2Fanalytics_at_scale%2F_quarto.yml="C25DB8A1" -~%2FDocuments%2Fanalytics_at_scale%2Fabout.qmd="C752720B" -~%2FDocuments%2Fanalytics_at_scale%2Findex.qmd="C9422BD5" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F2024-10-04.qmd="B5CF2C5C" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F2024-10-11.qmd="7377711F" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F2024.10.11.qmd="A0008A94" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F24.10.04.qmd="BF877C30" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes.qmd="19AA840D" -~%2FDocuments%2Fanalytics_at_scale%2Fsnippets.qmd="56B42243" -~%2FDocuments%2Fanalytics_at_scale%2Fsnippets.qmd.R="6BFBD54A" -~%2FDocuments%2Fanalytics_at_scale%2Ftutorials%2Fsnippets.qmd="DB772C11" diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E deleted file mode 100644 index 9f5ffc4..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E +++ /dev/null @@ -1,31 +0,0 @@ -{ - "id": "0AED968E", - "path": "~/Documents/analytics_at_scale/meeting-notes/2024-10-04.qmd", - "project_path": "meeting-notes/2024-10-04.qmd", - "type": "quarto_markdown", - "hash": "2331898432", - "contents": "", - "dirty": false, - "created": 1729796157579.0, - "source_on_save": true, - "relative_order": 4, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "2,47", - "scrollLine": "0", - "rmdVisualModeLocation": "121:6" - }, - "folds": "", - "lastKnownWriteTime": 1729801518, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729801518515, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents deleted file mode 100644 index e125d31..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents +++ /dev/null @@ -1,163 +0,0 @@ ---- -title: "RStudio Snippets Tutorial" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, -you can streamline your coding process, reduce repetitive tasks, and -maintain consistency across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be -quickly inserted into your scripts or markdown files. They enhance -productivity by minimizing repetitive typing and ensuring consistency -across your coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own snippet](media/snippets.gif) - -::: {.callout-note} -Note that all code after the `snippet ` key word must be indented. -::: - -## More custom snippets to try - -```r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -```` - - diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 deleted file mode 100644 index e12b200..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 +++ /dev/null @@ -1,26 +0,0 @@ -{ - "id": "4F13DDE1", - "path": "~/Documents/analytics_at_scale/_quarto.yml", - "project_path": "_quarto.yml", - "type": "yaml", - "hash": "2293911025", - "contents": "", - "dirty": false, - "created": 1728654014367.0, - "source_on_save": false, - "relative_order": 2, - "properties": { - "source_window_id": "", - "Source": "Source", - "cursorPosition": "25,30", - "scrollLine": "2" - }, - "folds": "", - "lastKnownWriteTime": 1729800548, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729800548269, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents deleted file mode 100644 index f7b1925..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents +++ /dev/null @@ -1,38 +0,0 @@ -project: - type: website - -website: - title: "Analytics at Scale" - - navbar: - search: true - tools: - - icon: github - menu: - - text: Source Code - href: https://github.com/analyticsphere/analytics-at-scale - - text: Analyticshpere - href: https://github.com/analyticsphere - - sidebar: - style: "floating" - search: true - contents: - - href: index.qmd - text: Home - - meeting-notes.qmd - - section: "Tutorials" - contents: - - tutorials/snippets.qmd - - text: "Count by Period w/ DBI " - url: https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html - - -format: - html: - theme: cosmo - css: styles.css - toc: true - -editor: visual - diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents deleted file mode 100644 index 27546a0..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Kickoff -date: "2024-10-24" ---- diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F deleted file mode 100644 index 4bce691..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F +++ /dev/null @@ -1,26 +0,0 @@ -{ - "id": "6D5FAD6F", - "path": "~/Documents/analytics_at_scale/.github/workflows/quarto-publish.yml", - "project_path": ".github/workflows/quarto-publish.yml", - "type": "yaml", - "hash": "0", - "contents": "", - "dirty": false, - "created": 1729862007230.0, - "source_on_save": false, - "relative_order": 8, - "properties": { - "source_window_id": "", - "Source": "Source", - "cursorPosition": "42,45", - "scrollLine": "0" - }, - "folds": "", - "lastKnownWriteTime": 1729862022, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729862022079, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents deleted file mode 100644 index 338ca78..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents +++ /dev/null @@ -1,66 +0,0 @@ -on: - push: - branches: main - -name: Render and Publish - -# you need these permissions to publish to GitHub pages -permissions: - contents: write - pages: write - -jobs: - build-deploy: - runs-on: ubuntu-latest - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Set up Quarto - uses: quarto-dev/quarto-actions/setup@v2 - with: - # To install LaTeX to build PDF book - tinytex: true - # uncomment below and fill to pin a version - # version: SPECIFIC-QUARTO-VERSION-HERE - - # add software dependencies here and any libraries - - # From https://github.com/actions/setup-python - # - name: Setup Python - # uses: actions/setup-python@v3 - - # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - # - name: Setup R - # uses: r-lib/actions/setup-r@v2 - - # From https://github.com/julia-actions/setup-julia - # - name: Setup Julia - # uses: julia-actions/setup-julia@v1 - - # See more at https://github.com/quarto-dev/quarto-actions/blob/main/examples/example-03-dependencies.md - - # To publish to Netlify, RStudio Connect, or GitHub Pages, uncomment - # the appropriate block below - - # - name: Publish to Netlify (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: netlify - # NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - - # - name: Publish to RStudio Connect (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: connect - # CONNECT_SERVER: enter-the-server-url-here - # CONNECT_API_KEY: ${{ secrets.CONNECT_API_KEY }} - - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - - name: Publish to GitHub Pages (and render) - uses: quarto-dev/quarto-actions/publish@v2 - with: - target: gh-pages - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # this secret is always available for github actions - \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents deleted file mode 100644 index 3d95ecc..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents +++ /dev/null @@ -1 +0,0 @@ -"nih-nci-dceg-connect-dev" \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 deleted file mode 100644 index fdabc52..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "C45B3481", - "path": "~/Documents/analytics_at_scale/meeting-notes.qmd", - "project_path": "meeting-notes.qmd", - "type": "quarto_markdown", - "hash": "4104937591", - "contents": "", - "dirty": false, - "created": 1729796064645.0, - "source_on_save": true, - "relative_order": 3, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "9,0", - "scrollLine": "0", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "295:0", - "docOutlineSize": "141" - }, - "folds": "", - "lastKnownWriteTime": 1729799650, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729799650238, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents deleted file mode 100644 index ab4a89b..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: "Meeting Notes" -listing: - type: table - fields: [date, title, subtitle] - sort: "date desc" - categories: false - contents: "meeting-notes/*.qmd" ---- diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 deleted file mode 100644 index 8f1009b..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 +++ /dev/null @@ -1,29 +0,0 @@ -{ - "id": "D4827836", - "path": "~/Documents/analytics_at_scale/tutorials/snippets.qmd", - "project_path": "tutorials/snippets.qmd", - "type": "quarto_markdown", - "hash": "0", - "contents": "", - "dirty": false, - "created": 1729799654509.0, - "source_on_save": false, - "relative_order": 7, - "properties": { - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "120:0" - }, - "folds": "", - "lastKnownWriteTime": 1729798425, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729798425, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents deleted file mode 100644 index 1df2b04..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: "RStudio Snippets" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, you can streamline -your coding process, reduce repetitive tasks, and maintain consistency -across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be quickly -inserted into your scripts or markdown files. They enhance productivity -by minimizing repetitive typing and ensuring consistency across your -coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) - -::: callout-note -Note that all code after the `snippet ` key word must -be indented. -::: - -## More custom snippets to try - -``` r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -``` diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents deleted file mode 100644 index 6475a6f..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Meeting Notes" ---- - -::: {.callout-note icon=false collapse=true} - -## 24.10.04 - Kick-off - -Using callouts is an effective way to highlight content that your reader give special consideration or attention. - -::: - - -::: {.callout-note icon=false collapse=true} - -## 24.10.04 - Kick-off - -**Attendees:** JP, KD, BC, LO - -**Topic:** What do we want from this workshop? - -**Notes:** - - item - - item - - item -::: \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 deleted file mode 100644 index 856927f..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 +++ /dev/null @@ -1,31 +0,0 @@ -{ - "id": "F27986C2", - "path": "~/Documents/analytics_at_scale/meeting-notes/2024-10-11.qmd", - "project_path": "meeting-notes/2024-10-11.qmd", - "type": "quarto_markdown", - "hash": "3039937237", - "contents": "", - "dirty": false, - "created": 1729797996405.0, - "source_on_save": true, - "relative_order": 5, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "13,69", - "scrollLine": "0", - "rmdVisualModeLocation": "202:0" - }, - "folds": "", - "lastKnownWriteTime": 1729801134, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729801134707, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA deleted file mode 100644 index f0dfb94..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "F2F6FBFA", - "path": "~/Documents/analytics_at_scale/snippets.qmd", - "project_path": "snippets.qmd", - "type": "quarto_markdown", - "hash": "0", - "contents": "", - "dirty": false, - "created": 1729798351799.0, - "source_on_save": false, - "relative_order": 6, - "properties": { - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualMode": "true", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "30:0", - "rmdVisualWrapConfigured": "true", - "docOutlineVisible": "1", - "docOutlineSize": "259" - }, - "folds": "", - "lastKnownWriteTime": 1729800341, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729800341542, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents deleted file mode 100644 index 1df2b04..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: "RStudio Snippets" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, you can streamline -your coding process, reduce repetitive tasks, and maintain consistency -across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be quickly -inserted into your scripts or markdown files. They enhance productivity -by minimizing repetitive typing and ensuring consistency across your -coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) - -::: callout-note -Note that all code after the `snippet ` key word must -be indented. -::: - -## More custom snippets to try - -``` r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -``` diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C deleted file mode 100644 index 32d8aa9..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C +++ /dev/null @@ -1,31 +0,0 @@ -{ - "id": "F466712C", - "path": "~/Documents/analytics_at_scale/index.qmd", - "project_path": "index.qmd", - "type": "quarto_markdown", - "hash": "1743756931", - "contents": "", - "dirty": false, - "created": 1728654014033.0, - "source_on_save": true, - "relative_order": 1, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "158:0", - "cursorPosition": "25,78", - "scrollLine": "0" - }, - "folds": "", - "lastKnownWriteTime": 1729861922, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729861922941, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents deleted file mode 100644 index 7bdce6e..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Analytics at Scale" -subtitle: Workshopping best practices for big data analytics in epidemiology ---- - -## Objective - - Hold weekly workshops **lead by team members** or invitees. - - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page - - **Standardize** our analytic workflows. - -## Focus - -- Best practices for writing **robust**, **reproducible**, and **readable** R code -- Optimization for big data as our data set grows -- Tips and tricks for productivity -- Integration with cloud computing resources and data sources -- Integrating R and SQL - -## Past topics -DBI and dbplyr, code snippets - -## Future workshop topics: -functional programming, `renv`, GitHub, Docker, pipelines, Style Guides, -developing R packages, publishing data products, RStudio Snippets, `linter`, -`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, -`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB, -... - -## References -- [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham -- [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez -- [R Packages](https://r-pkgs.org/) by Hadley Wickham -- [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff - diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/lock_file b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/lock_file deleted file mode 100644 index e69de29..0000000 diff --git a/.Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json b/.Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json deleted file mode 100644 index a894885..0000000 --- a/.Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796169} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json deleted file mode 100644 index 2cba458..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729779284} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 85d8f98..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729795766} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json deleted file mode 100644 index 40235a6..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1728655103} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json deleted file mode 100644 index 85d8f98..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729795766} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 03af110..0000000 --- a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796338} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json b/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json deleted file mode 100644 index 03af110..0000000 --- a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796338} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 7e93b17..0000000 --- a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798164} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json b/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json deleted file mode 100644 index 7e93b17..0000000 --- a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798164} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 5802856..0000000 --- a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798354} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json b/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json deleted file mode 100644 index 5802856..0000000 --- a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798354} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index b69a406..0000000 --- a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729797996} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json b/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json deleted file mode 100644 index b69a406..0000000 --- a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729797996} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 3bf8b5b..0000000 --- a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796066} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json b/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json deleted file mode 100644 index 3bf8b5b..0000000 --- a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796066} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/patch-chunk-names b/.Rproj.user/shared/notebooks/patch-chunk-names deleted file mode 100644 index e69de29..0000000 diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths deleted file mode 100644 index 1432c4a..0000000 --- a/.Rproj.user/shared/notebooks/paths +++ /dev/null @@ -1,8 +0,0 @@ -/Users/petersjm/Documents/analytics_at_scale/index.qmd="3B243C68" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes.qmd="F71330AC" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/2024-10-04.qmd="4E1298AF" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/2024-10-11.qmd="6F16FCC1" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/2024.10.11.qmd="D470729E" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/24.10.04.qmd="2CBBAA59" -/Users/petersjm/Documents/analytics_at_scale/snippets.qmd="9DA072F5" -/Users/petersjm/Documents/analytics_at_scale/tutorials/snippets.qmd="9348CF30" diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 338ca78..8786b33 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -4,10 +4,10 @@ on: name: Render and Publish -# you need these permissions to publish to GitHub pages +# Permissions required to publish to GitHub Pages permissions: - contents: write - pages: write + contents: write + pages: write jobs: build-deploy: @@ -15,52 +15,42 @@ jobs: steps: - name: Check out repository uses: actions/checkout@v4 - - - name: Set up Quarto - uses: quarto-dev/quarto-actions/setup@v2 - with: - # To install LaTeX to build PDF book - tinytex: true - # uncomment below and fill to pin a version - # version: SPECIFIC-QUARTO-VERSION-HERE - - # add software dependencies here and any libraries - - # From https://github.com/actions/setup-python - # - name: Setup Python - # uses: actions/setup-python@v3 - # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - # - name: Setup R - # uses: r-lib/actions/setup-r@v2 + - name: Setup R + uses: r-lib/actions/setup-r@v2 + # with: + # Optionally specify an R version + # r-version: '4.4.1' - # From https://github.com/julia-actions/setup-julia - # - name: Setup Julia - # uses: julia-actions/setup-julia@v1 + - name: Cache R Packages + uses: actions/cache@v3 + with: + path: ${{ github.workspace }}/R/library + key: ${{ runner.os }}-R-${{ hashFiles('**/DESCRIPTION') }} + restore-keys: | + ${{ runner.os }}-R- - # See more at https://github.com/quarto-dev/quarto-actions/blob/main/examples/example-03-dependencies.md + - name: Install R Packages + env: + R_LIBS_USER: ${{ github.workspace }}/R/library + run: | + mkdir -p $R_LIBS_USER + Rscript -e 'install.packages(c("knitr", "rmarkdown", "ggplot", "palmerpenguins", "plotly"), repos = "https://cloud.r-project.org/", lib = Sys.getenv("R_LIBS_USER"))' - # To publish to Netlify, RStudio Connect, or GitHub Pages, uncomment - # the appropriate block below - - # - name: Publish to Netlify (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: netlify - # NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - - # - name: Publish to RStudio Connect (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: connect - # CONNECT_SERVER: enter-the-server-url-here - # CONNECT_API_KEY: ${{ secrets.CONNECT_API_KEY }} + - name: Verify R Packages Installation + run: | + Rscript -e "library(knitr, lib.loc = Sys.getenv('R_LIBS_USER'))" + Rscript -e "library(rmarkdown, lib.loc = Sys.getenv('R_LIBS_USER'))" + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + with: + tinytex: true + # version: '1.2.3' # Uncomment to pin Quarto version - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 with: target: gh-pages env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # this secret is always available for github actions - \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0cf327 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata +.Rproj.user/* +.DS_Store +.quarto/* +.Rproj* +README_files/* +_site/* +README.html +*.Rproj + +/.quarto/ diff --git a/.quarto/_freeze/about/execute-results/html.json b/.quarto/_freeze/about/execute-results/html.json deleted file mode 100644 index be15350..0000000 --- a/.quarto/_freeze/about/execute-results/html.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "hash": "a754ce02c0472c0b163d8ffb898e69ac", - "result": { - "engine": "knitr", - "markdown": "---\ntitle: \"About\"\n---\n\n\n\n\n\nAbout this site\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n1 + 1\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 2\n```\n\n\n:::\n:::\n", - "supporting": [], - "filters": [ - "rmarkdown/pagebreak.lua" - ], - "includes": {}, - "engineDependencies": {}, - "preserve": {}, - "postProcess": true - } -} \ No newline at end of file diff --git a/.quarto/_freeze/index/execute-results/html.json b/.quarto/_freeze/index/execute-results/html.json deleted file mode 100644 index 5485528..0000000 --- a/.quarto/_freeze/index/execute-results/html.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "hash": "6617ac8635029eb78f09b7ba6924dcc9", - "result": { - "engine": "knitr", - "markdown": "---\ntitle: \"Analytics at Scale\"\n---\n\n\n\nThis is a Quarto website.\n\nTo learn more about Quarto websites visit .\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n1 + 1\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 2\n```\n\n\n:::\n:::\n", - "supporting": [], - "filters": [ - "rmarkdown/pagebreak.lua" - ], - "includes": {}, - "engineDependencies": {}, - "preserve": {}, - "postProcess": true - } -} \ No newline at end of file diff --git a/.quarto/_freeze/site_libs/clipboard/clipboard.min.js b/.quarto/_freeze/site_libs/clipboard/clipboard.min.js deleted file mode 100644 index 1103f81..0000000 --- a/.quarto/_freeze/site_libs/clipboard/clipboard.min.js +++ /dev/null @@ -1,7 +0,0 @@ -/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */ -!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=10?setTimeout((function(){e(r,n,s)}),1):(t.update(),n(s))}}},"./src/filter.js":function(t){t.exports=function(t){return t.handlers.filterStart=t.handlers.filterStart||[],t.handlers.filterComplete=t.handlers.filterComplete||[],function(e){if(t.trigger("filterStart"),t.i=1,t.reset.filter(),void 0===e)t.filtered=!1;else{t.filtered=!0;for(var r=t.items,n=0,s=r.length;nv.page,a=new g(t[s],void 0,n),v.items.push(a),r.push(a)}return v.update(),r}m(t.slice(0),e)}},this.show=function(t,e){return this.i=t,this.page=e,v.update(),v},this.remove=function(t,e,r){for(var n=0,s=0,i=v.items.length;s-1&&r.splice(n,1),v},this.trigger=function(t){for(var e=v.handlers[t].length;e--;)v.handlers[t][e](v);return v},this.reset={filter:function(){for(var t=v.items,e=t.length;e--;)t[e].filtered=!1;return v},search:function(){for(var t=v.items,e=t.length;e--;)t[e].found=!1;return v}},this.update=function(){var t=v.items,e=t.length;v.visibleItems=[],v.matchingItems=[],v.templater.clear();for(var r=0;r=v.i&&v.visibleItems.lengthe},innerWindow:function(t,e,r){return t>=e-r&&t<=e+r},dotted:function(t,e,r,n,s,i,a){return this.dottedLeft(t,e,r,n,s,i)||this.dottedRight(t,e,r,n,s,i,a)},dottedLeft:function(t,e,r,n,s,i){return e==r+1&&!this.innerWindow(e,s,i)&&!this.right(e,n)},dottedRight:function(t,e,r,n,s,i,a){return!t.items[a-1].values().dotted&&(e==n&&!this.innerWindow(e,s,i)&&!this.right(e,n))}};return function(e){var n=new i(t.listContainer.id,{listClass:e.paginationClass||"pagination",item:e.item||"
  • ",valueNames:["page","dotted"],searchClass:"pagination-search-that-is-not-supposed-to-exist",sortClass:"pagination-sort-that-is-not-supposed-to-exist"});s.bind(n.listContainer,"click",(function(e){var r=e.target||e.srcElement,n=t.utils.getAttribute(r,"data-page"),s=t.utils.getAttribute(r,"data-i");s&&t.show((s-1)*n+1,n)})),t.on("updated",(function(){r(n,e)})),r(n,e)}}},"./src/parse.js":function(t,e,r){t.exports=function(t){var e=r("./src/item.js")(t),n=function(r,n){for(var s=0,i=r.length;s0?setTimeout((function(){e(r,s)}),1):(t.update(),t.trigger("parseComplete"))};return t.handlers.parseComplete=t.handlers.parseComplete||[],function(){var e=function(t){for(var e=t.childNodes,r=[],n=0,s=e.length;n]/g.exec(t)){var e=document.createElement("tbody");return e.innerHTML=t,e.firstElementChild}if(-1!==t.indexOf("<")){var r=document.createElement("div");return r.innerHTML=t,r.firstElementChild}}},a=function(e,r,n){var s=void 0,i=function(e){for(var r=0,n=t.valueNames.length;r=1;)t.list.removeChild(t.list.firstChild)},function(){var r;if("function"!=typeof t.item){if(!(r="string"==typeof t.item?-1===t.item.indexOf("<")?document.getElementById(t.item):i(t.item):s()))throw new Error("The list needs to have at least one item on init otherwise you'll have to add a template.");r=n(r,t.valueNames),e=function(){return r.cloneNode(!0)}}else e=function(e){var r=t.item(e);return i(r)}}()};t.exports=function(t){return new e(t)}},"./src/utils/classes.js":function(t,e,r){var n=r("./src/utils/index-of.js"),s=/\s+/;Object.prototype.toString;function i(t){if(!t||!t.nodeType)throw new Error("A DOM element reference is required");this.el=t,this.list=t.classList}t.exports=function(t){return new i(t)},i.prototype.add=function(t){if(this.list)return this.list.add(t),this;var e=this.array();return~n(e,t)||e.push(t),this.el.className=e.join(" "),this},i.prototype.remove=function(t){if(this.list)return this.list.remove(t),this;var e=this.array(),r=n(e,t);return~r&&e.splice(r,1),this.el.className=e.join(" "),this},i.prototype.toggle=function(t,e){return this.list?(void 0!==e?e!==this.list.toggle(t,e)&&this.list.toggle(t):this.list.toggle(t),this):(void 0!==e?e?this.add(t):this.remove(t):this.has(t)?this.remove(t):this.add(t),this)},i.prototype.array=function(){var t=(this.el.getAttribute("class")||"").replace(/^\s+|\s+$/g,"").split(s);return""===t[0]&&t.shift(),t},i.prototype.has=i.prototype.contains=function(t){return this.list?this.list.contains(t):!!~n(this.array(),t)}},"./src/utils/events.js":function(t,e,r){var n=window.addEventListener?"addEventListener":"attachEvent",s=window.removeEventListener?"removeEventListener":"detachEvent",i="addEventListener"!==n?"on":"",a=r("./src/utils/to-array.js");e.bind=function(t,e,r,s){for(var o=0,l=(t=a(t)).length;o32)return!1;var a=n,o=function(){var t,r={};for(t=0;t=p;b--){var j=o[t.charAt(b-1)];if(C[b]=0===m?(C[b+1]<<1|1)&j:(C[b+1]<<1|1)&j|(v[b+1]|v[b])<<1|1|v[b+1],C[b]&d){var x=l(m,b-1);if(x<=u){if(u=x,!((c=b-1)>a))break;p=Math.max(1,2*a-c)}}}if(l(m+1,a)>u)break;v=C}return!(c<0)}},"./src/utils/get-attribute.js":function(t){t.exports=function(t,e){var r=t.getAttribute&&t.getAttribute(e)||null;if(!r)for(var n=t.attributes,s=n.length,i=0;i=48&&t<=57}function i(t,e){for(var i=(t+="").length,a=(e+="").length,o=0,l=0;o=i&&l=a?-1:l>=a&&o=i?1:i-a}i.caseInsensitive=i.i=function(t,e){return i((""+t).toLowerCase(),(""+e).toLowerCase())},Object.defineProperties(i,{alphabet:{get:function(){return e},set:function(t){r=[];var s=0;if(e=t)for(;s { - if (categoriesLoaded) { - activateCategory(category); - setCategoryHash(category); - } -}; - -window["quarto-listing-loaded"] = () => { - // Process any existing hash - const hash = getHash(); - - if (hash) { - // If there is a category, switch to that - if (hash.category) { - activateCategory(hash.category); - } - // Paginate a specific listing - const listingIds = Object.keys(window["quarto-listings"]); - for (const listingId of listingIds) { - const page = hash[getListingPageKey(listingId)]; - if (page) { - showPage(listingId, page); - } - } - } - - const listingIds = Object.keys(window["quarto-listings"]); - for (const listingId of listingIds) { - // The actual list - const list = window["quarto-listings"][listingId]; - - // Update the handlers for pagination events - refreshPaginationHandlers(listingId); - - // Render any visible items that need it - renderVisibleProgressiveImages(list); - - // Whenever the list is updated, we also need to - // attach handlers to the new pagination elements - // and refresh any newly visible items. - list.on("updated", function () { - renderVisibleProgressiveImages(list); - setTimeout(() => refreshPaginationHandlers(listingId)); - - // Show or hide the no matching message - toggleNoMatchingMessage(list); - }); - } -}; - -window.document.addEventListener("DOMContentLoaded", function (_event) { - // Attach click handlers to categories - const categoryEls = window.document.querySelectorAll( - ".quarto-listing-category .category" - ); - - for (const categoryEl of categoryEls) { - const category = categoryEl.getAttribute("data-category"); - categoryEl.onclick = () => { - activateCategory(category); - setCategoryHash(category); - }; - } - - // Attach a click handler to the category title - // (there should be only one, but since it is a class name, handle N) - const categoryTitleEls = window.document.querySelectorAll( - ".quarto-listing-category-title" - ); - for (const categoryTitleEl of categoryTitleEls) { - categoryTitleEl.onclick = () => { - activateCategory(""); - setCategoryHash(""); - }; - } - - categoriesLoaded = true; -}); - -function toggleNoMatchingMessage(list) { - const selector = `#${list.listContainer.id} .listing-no-matching`; - const noMatchingEl = window.document.querySelector(selector); - if (noMatchingEl) { - if (list.visibleItems.length === 0) { - noMatchingEl.classList.remove("d-none"); - } else { - if (!noMatchingEl.classList.contains("d-none")) { - noMatchingEl.classList.add("d-none"); - } - } - } -} - -function setCategoryHash(category) { - setHash({ category }); -} - -function setPageHash(listingId, page) { - const currentHash = getHash() || {}; - currentHash[getListingPageKey(listingId)] = page; - setHash(currentHash); -} - -function getListingPageKey(listingId) { - return `${listingId}-page`; -} - -function refreshPaginationHandlers(listingId) { - const listingEl = window.document.getElementById(listingId); - const paginationEls = listingEl.querySelectorAll( - ".pagination li.page-item:not(.disabled) .page.page-link" - ); - for (const paginationEl of paginationEls) { - paginationEl.onclick = (sender) => { - setPageHash(listingId, sender.target.getAttribute("data-i")); - showPage(listingId, sender.target.getAttribute("data-i")); - return false; - }; - } -} - -function renderVisibleProgressiveImages(list) { - // Run through the visible items and render any progressive images - for (const item of list.visibleItems) { - const itemEl = item.elm; - if (itemEl) { - const progressiveImgs = itemEl.querySelectorAll( - `img[${kProgressiveAttr}]` - ); - for (const progressiveImg of progressiveImgs) { - const srcValue = progressiveImg.getAttribute(kProgressiveAttr); - if (srcValue) { - progressiveImg.setAttribute("src", srcValue); - } - progressiveImg.removeAttribute(kProgressiveAttr); - } - } - } -} - -function getHash() { - // Hashes are of the form - // #name:value|name1:value1|name2:value2 - const currentUrl = new URL(window.location); - const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined; - return parseHash(hashRaw); -} - -const kAnd = "&"; -const kEquals = "="; - -function parseHash(hash) { - if (!hash) { - return undefined; - } - const hasValuesStrs = hash.split(kAnd); - const hashValues = hasValuesStrs - .map((hashValueStr) => { - const vals = hashValueStr.split(kEquals); - if (vals.length === 2) { - return { name: vals[0], value: vals[1] }; - } else { - return undefined; - } - }) - .filter((value) => { - return value !== undefined; - }); - - const hashObj = {}; - hashValues.forEach((hashValue) => { - hashObj[hashValue.name] = decodeURIComponent(hashValue.value); - }); - return hashObj; -} - -function makeHash(obj) { - return Object.keys(obj) - .map((key) => { - return `${key}${kEquals}${obj[key]}`; - }) - .join(kAnd); -} - -function setHash(obj) { - const hash = makeHash(obj); - window.history.pushState(null, null, `#${hash}`); -} - -function showPage(listingId, page) { - const list = window["quarto-listings"][listingId]; - if (list) { - list.show((page - 1) * list.page + 1, list.page); - } -} - -function activateCategory(category) { - // Deactivate existing categories - const activeEls = window.document.querySelectorAll( - ".quarto-listing-category .category.active" - ); - for (const activeEl of activeEls) { - activeEl.classList.remove("active"); - } - - // Activate this category - const categoryEl = window.document.querySelector( - `.quarto-listing-category .category[data-category='${category}'` - ); - if (categoryEl) { - categoryEl.classList.add("active"); - } - - // Filter the listings to this category - filterListingCategory(category); -} - -function filterListingCategory(category) { - const listingIds = Object.keys(window["quarto-listings"]); - for (const listingId of listingIds) { - const list = window["quarto-listings"][listingId]; - if (list) { - if (category === "") { - // resets the filter - list.filter(); - } else { - // filter to this category - list.filter(function (item) { - const itemValues = item.values(); - if (itemValues.categories !== null) { - const categories = itemValues.categories.split(","); - return categories.includes(category); - } else { - return false; - } - }); - } - } - } -} diff --git a/.quarto/idx/index.qmd.json b/.quarto/idx/index.qmd.json deleted file mode 100644 index 24f549e..0000000 --- a/.quarto/idx/index.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Analytics at Scale","markdown":{"yaml":{"title":"Analytics at Scale","subtitle":"Workshopping best practices for big data analytics in epidemiology"},"headingText":"Objective","containsRefs":false,"markdown":"\n\n - Hold weekly workshops **lead by team members** or invitees.\n - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page\n - **Standardize** our analytic workflows.\n \n## Focus\n\n- Best practices for writing **robust**, **reproducible**, and **readable** R code\n- Optimization for big data as our data set grows\n- Tips and tricks for productivity \n- Integration with cloud computing resources and data sources\n- Integrating R and SQL \n\n## Past topics\nDBI and dbplyr, code snippets\n\n## Future workshop topics:\nfunctional programming, `renv`, GitHub, Docker, pipelines, Style Guides, \ndeveloping R packages, publishing data products, RStudio Snippets, `linter`, \n`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, \n`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB,\n... \n\n## References\n- [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham\n- [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez\n- [R Packages](https://r-pkgs.org/) by Hadley Wickham\n- [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff\n\n","srcMarkdownNoYaml":"\n\n## Objective\n - Hold weekly workshops **lead by team members** or invitees.\n - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page\n - **Standardize** our analytic workflows.\n \n## Focus\n\n- Best practices for writing **robust**, **reproducible**, and **readable** R code\n- Optimization for big data as our data set grows\n- Tips and tricks for productivity \n- Integration with cloud computing resources and data sources\n- Integrating R and SQL \n\n## Past topics\nDBI and dbplyr, code snippets\n\n## Future workshop topics:\nfunctional programming, `renv`, GitHub, Docker, pipelines, Style Guides, \ndeveloping R packages, publishing data products, RStudio Snippets, `linter`, \n`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, \n`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB,\n... \n\n## References\n- [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham\n- [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez\n- [R Packages](https://r-pkgs.org/) by Hadley Wickham\n- [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["styles.css"],"toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Analytics at Scale","subtitle":"Workshopping best practices for big data analytics in epidemiology"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/meeting-notes.qmd.json b/.quarto/idx/meeting-notes.qmd.json deleted file mode 100644 index 635b8ba..0000000 --- a/.quarto/idx/meeting-notes.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Meeting Notes","markdown":{"yaml":{"title":"Meeting Notes","listing":{"type":"table","fields":["date","title","subtitle"],"sort":"date desc","categories":false,"contents":"meeting-notes/*.qmd"}},"containsRefs":false,"markdown":"\n","srcMarkdownNoYaml":"\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["styles.css"],"toc":true,"output-file":"meeting-notes.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Meeting Notes","listing":{"type":"table","fields":["date","title","subtitle"],"sort":"date desc","categories":false,"contents":"meeting-notes/*.qmd"}},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/meeting-notes/2024-10-04.qmd.json b/.quarto/idx/meeting-notes/2024-10-04.qmd.json deleted file mode 100644 index d3d42e8..0000000 --- a/.quarto/idx/meeting-notes/2024-10-04.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Counts by Period","markdown":{"yaml":{"title":"Counts by Period","subtitle":"Optimizing counts by period using DBI","date":"2024-10-04","author":"Jake Peters"},"headingText":"Attendees:","containsRefs":false,"markdown":"\n\nJP, KD, BC, LO\n\n### Tutorial: Count by Period w/ DBI\nJP walked through the [Count by Period w/ DBI](https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html) tutorial to demonstrate some code he wrote to create counts by day, week, month, quarter or year using a DBI connection to avoid downloading the data until after aggreagion. This was intended to address some issues that Kelsey was having with the Biospecimen Report which has many tables that are aggregated by date, precluding her from using DBI easily.\n\n### What do we want to use this workshop for?\n\n- Leila:\n\n - Use this to refactor code that is written, but need guidance to improve or use better practices\n\n - Debug problem code together\n\n- Jake\n\n - Tackle big, recurring problems together\n\n - Write functions that can be reused\n\n- Kelsey:\n\n - Refactoring code that has been re-used or passed repeatedly\n\n - Even working with ChatGPT\n\n - Using SQL to do transformations THEN pulling the data down\n\n- Brittany:\n\n - SQL specific discussion.\n\n - We are all a little weaker in SQL.\n\n- SQL:\n\n - Joins\n\n - Unions\n\n - Transformations\n\n - Group By (with counts)\n\n - Ungroup after counts (when is this necessary and why)\n\n - SQL Course on Coursera\n\n### Second Workshop:\n\n- Code Snippets\n\n### Future Worshops:\n\n- Joining Module 1 v1 and v2 Joining Module 2 v1 and v2\n","srcMarkdownNoYaml":"\n\n### Attendees: \nJP, KD, BC, LO\n\n### Tutorial: Count by Period w/ DBI\nJP walked through the [Count by Period w/ DBI](https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html) tutorial to demonstrate some code he wrote to create counts by day, week, month, quarter or year using a DBI connection to avoid downloading the data until after aggreagion. This was intended to address some issues that Kelsey was having with the Biospecimen Report which has many tables that are aggregated by date, precluding her from using DBI easily.\n\n### What do we want to use this workshop for?\n\n- Leila:\n\n - Use this to refactor code that is written, but need guidance to improve or use better practices\n\n - Debug problem code together\n\n- Jake\n\n - Tackle big, recurring problems together\n\n - Write functions that can be reused\n\n- Kelsey:\n\n - Refactoring code that has been re-used or passed repeatedly\n\n - Even working with ChatGPT\n\n - Using SQL to do transformations THEN pulling the data down\n\n- Brittany:\n\n - SQL specific discussion.\n\n - We are all a little weaker in SQL.\n\n- SQL:\n\n - Joins\n\n - Unions\n\n - Transformations\n\n - Group By (with counts)\n\n - Ungroup after counts (when is this necessary and why)\n\n - SQL Course on Coursera\n\n### Second Workshop:\n\n- Code Snippets\n\n### Future Worshops:\n\n- Joining Module 1 v1 and v2 Joining Module 2 v1 and v2\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["../styles.css"],"toc":true,"output-file":"2024-10-04.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Counts by Period","subtitle":"Optimizing counts by period using DBI","date":"2024-10-04","author":"Jake Peters"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/meeting-notes/2024-10-11.qmd.json b/.quarto/idx/meeting-notes/2024-10-11.qmd.json deleted file mode 100644 index 566c390..0000000 --- a/.quarto/idx/meeting-notes/2024-10-11.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Snippets","markdown":{"yaml":{"title":"Snippets","subtitle":"Custom code snippets in RStudio","date":"2024-10-11","author":"Jake Peters"},"headingText":"Attendees","containsRefs":false,"markdown":"\n\n\nJP, LO, BC\n\n### Notes\n\n- JP walked through a [RStudio Snippets](../tutorials/snippets.qmd) tutorial.\n\n- LO and BC set up custom RStudio Snippets on their local machines.\n","srcMarkdownNoYaml":"\n\n### Attendees\n\nJP, LO, BC\n\n### Notes\n\n- JP walked through a [RStudio Snippets](../tutorials/snippets.qmd) tutorial.\n\n- LO and BC set up custom RStudio Snippets on their local machines.\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["../styles.css"],"toc":true,"output-file":"2024-10-11.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Snippets","subtitle":"Custom code snippets in RStudio","date":"2024-10-11","author":"Jake Peters"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/snippets.qmd.json b/.quarto/idx/snippets.qmd.json deleted file mode 100644 index 16b1312..0000000 --- a/.quarto/idx/snippets.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"RStudio Snippets","markdown":{"yaml":{"title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24","format":"html","editor":{"markdown":{"wrap":72}}},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n","srcMarkdownNoYaml":"\n\n# Introduction\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["styles.css"],"toc":true,"output-file":"snippets.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":{"markdown":{"wrap":72}},"theme":"cosmo","title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/tutorials/snippets.qmd.json b/.quarto/idx/tutorials/snippets.qmd.json deleted file mode 100644 index 0981ac7..0000000 --- a/.quarto/idx/tutorials/snippets.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"RStudio Snippets","markdown":{"yaml":{"title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24","format":"html","editor":{"markdown":{"wrap":72}}},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n","srcMarkdownNoYaml":"\n\n# Introduction\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["../styles.css"],"toc":true,"output-file":"snippets.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":{"markdown":{"wrap":72}},"theme":"cosmo","title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/listing/listing-cache.json b/.quarto/listing/listing-cache.json deleted file mode 100644 index ad0312b..0000000 --- a/.quarto/listing/listing-cache.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "listingMap": { - "meeting-notes.qmd": [ - "meeting-notes/*.qmd" - ] - } -} \ No newline at end of file diff --git a/.quarto/preview/lock b/.quarto/preview/lock deleted file mode 100644 index a7ae138..0000000 --- a/.quarto/preview/lock +++ /dev/null @@ -1 +0,0 @@ -50290 \ No newline at end of file diff --git a/.quarto/xref/598b2e90 b/.quarto/xref/598b2e90 deleted file mode 100644 index 7df77d5..0000000 --- a/.quarto/xref/598b2e90 +++ /dev/null @@ -1 +0,0 @@ -{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/6afc8b7a b/.quarto/xref/6afc8b7a deleted file mode 100644 index e9bc5b2..0000000 --- a/.quarto/xref/6afc8b7a +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":["attendees","tutorial-count-by-period-w-dbi","what-do-we-want-to-use-this-workshop-for","second-workshop","future-worshops"]} \ No newline at end of file diff --git a/.quarto/xref/755cd591 b/.quarto/xref/755cd591 deleted file mode 100644 index 0eaf8d7..0000000 --- a/.quarto/xref/755cd591 +++ /dev/null @@ -1 +0,0 @@ -{"headings":["introduction","table-of-contents","introduction-to-rstudio-snippets","using-built-in-snippets","fun","for","while","apply","lapply","ts","using-custom-snippets","more-custom-snippets-to-try"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/862fc1db b/.quarto/xref/862fc1db deleted file mode 100644 index 208374b..0000000 --- a/.quarto/xref/862fc1db +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":[]} \ No newline at end of file diff --git a/.quarto/xref/INDEX b/.quarto/xref/INDEX deleted file mode 100644 index 74a284d..0000000 --- a/.quarto/xref/INDEX +++ /dev/null @@ -1,26 +0,0 @@ -{ - "snippets.qmd": { - "snippets.html": "755cd591" - }, - "about.qmd": { - "about.html": "598b2e90" - }, - "index.qmd": { - "index.html": "fa6fa8d5" - }, - "meeting-notes.qmd": { - "meeting-notes.html": "d7a01680" - }, - "meeting-notes/24.10.04.qmd": { - "24.10.04.html": "862fc1db" - }, - "meeting-notes/2024-10-04.qmd": { - "2024-10-04.html": "6afc8b7a" - }, - "meeting-notes/2024-10-11.qmd": { - "2024-10-11.html": "e199f2dd" - }, - "tutorials/snippets.qmd": { - "snippets.html": "d0caaf14" - } -} \ No newline at end of file diff --git a/.quarto/xref/d0caaf14 b/.quarto/xref/d0caaf14 deleted file mode 100644 index 920cfc8..0000000 --- a/.quarto/xref/d0caaf14 +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":["introduction","table-of-contents","introduction-to-rstudio-snippets","using-built-in-snippets","fun","for","while","apply","lapply","ts","using-custom-snippets","more-custom-snippets-to-try"]} \ No newline at end of file diff --git a/.quarto/xref/d7a01680 b/.quarto/xref/d7a01680 deleted file mode 100644 index 7df77d5..0000000 --- a/.quarto/xref/d7a01680 +++ /dev/null @@ -1 +0,0 @@ -{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/e199f2dd b/.quarto/xref/e199f2dd deleted file mode 100644 index ee9fd39..0000000 --- a/.quarto/xref/e199f2dd +++ /dev/null @@ -1 +0,0 @@ -{"headings":["attendees","notes"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/fa6fa8d5 b/.quarto/xref/fa6fa8d5 deleted file mode 100644 index 3c647f9..0000000 --- a/.quarto/xref/fa6fa8d5 +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":["objective","focus","past-topics","future-workshop-topics","references"]} \ No newline at end of file diff --git a/README.md b/README.md index fbae9c8..d126fcf 100644 --- a/README.md +++ b/README.md @@ -1 +1,15 @@ -# analytics-at-scale-workshop \ No newline at end of file +# analytics-at-scale-workshop + +The Quarto Website is live at + +## How to contribute a tutorial + +Simply add a *.qmd*, *.rmd*, or *.md* file to the *tutorials/* directory. + +Once it is commited/pushed to main the website will rebuild/deploy to gh-pages. + +If a tutorial lives outside of this tutorial we can add it as a link in the `External Tutorials` tab. + +## How to add Meeting Notes + +Simply add a *.md* file to the *meeting-notes/* folder. diff --git a/_quarto.yml b/_quarto.yml index f7b1925..ff5de8d 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -1,6 +1,9 @@ project: type: website +execute: + freeze: auto + website: title: "Analytics at Scale" @@ -8,11 +11,8 @@ website: search: true tools: - icon: github - menu: - - text: Source Code - href: https://github.com/analyticsphere/analytics-at-scale - - text: Analyticshpere - href: https://github.com/analyticsphere + text: Quarto Website Source Code + href: https://github.com/Analyticsphere/analytics-at-scale-workshop sidebar: style: "floating" @@ -20,14 +20,16 @@ website: contents: - href: index.qmd text: Home + - href: scaling_your_r_project.qmd + text: Scaling Your R Project - meeting-notes.qmd - - section: "Tutorials" - contents: - - tutorials/snippets.qmd - - text: "Count by Period w/ DBI " + - section: Tutorials + contents: tutorials/*.*md + - section: External Tutorials + contents: + - text: "Count by Period w/ DBI" url: https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html - format: html: theme: cosmo diff --git a/analytics_at_scale.Rproj b/analytics_at_scale.Rproj deleted file mode 100644 index 8e3c2eb..0000000 --- a/analytics_at_scale.Rproj +++ /dev/null @@ -1,13 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX diff --git a/index.qmd b/index.qmd index 7bdce6e..7383361 100644 --- a/index.qmd +++ b/index.qmd @@ -4,31 +4,30 @@ subtitle: Workshopping best practices for big data analytics in epidemiology --- ## Objective - - Hold weekly workshops **lead by team members** or invitees. - - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page - - **Standardize** our analytic workflows. - + +- Hold weekly workshops **lead by team members** or invitees. +- **Produce resources** such as videos, tutorials and code resources from workshops on this resource page +- **Standardize** our analytic workflows. + ## Focus -- Best practices for writing **robust**, **reproducible**, and **readable** R code -- Optimization for big data as our data set grows -- Tips and tricks for productivity -- Integration with cloud computing resources and data sources -- Integrating R and SQL +- Best practices for writing **robust**, **reproducible**, and **readable** R code +- Optimization for big data as our data set grows +- Tips and tricks for productivity +- Integration with cloud computing resources and data sources +- Integrating R and SQL ## Past topics + DBI and dbplyr, code snippets ## Future workshop topics: -functional programming, `renv`, GitHub, Docker, pipelines, Style Guides, -developing R packages, publishing data products, RStudio Snippets, `linter`, -`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, -`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB, -... + +functional programming, `renv`, GitHub, Docker, pipelines, Style Guides, developing R packages, publishing data products, RStudio Snippets, `linter`, `styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, `plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB, ... ## References + - [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham - [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez - [R Packages](https://r-pkgs.org/) by Hadley Wickham - [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff - diff --git a/info_schema_tutorial.qmd b/info_schema_tutorial.qmd new file mode 100644 index 0000000..028c422 --- /dev/null +++ b/info_schema_tutorial.qmd @@ -0,0 +1,113 @@ +--- +title: "Querying BigQuery Information Schema" +author: "Jake Peters" +date: 2025-01-22 +--- + +## Introduction + +In this tutorial, we demonstrate how to use a Quarto document to query BigQuery's information schema and identify where specific Concept IDs are located in your database. By integrating R and SQL, you learn how to authenticate with BigQuery, execute custom queries, and dynamically construct search queries based on user-specified Concept IDs. + +## Prerequisites +- Before following this tutorial, ensure you have: R installed on your system. +- The following R packages installed: `bigrquery`, `dplyr`, `DBI`, `dbplyr`, and `glue`. +- Access to a BigQuery project and dataset with appropriate permissions to query the `INFORMATION_SCHEMA`. + +## Table of Contents: +1. **Setup and Authentication:** Load libraries, configure authentication, and establish a connection to BigQuery. +2. **Executing a Basic SQL Query:** Query the BigQuery `INFORMATION_SCHEMA.COLUMNS` to retrieve metadata that matches a specific pattern. +3. **Building a Dynamic Query Function:** Create an R function to dynamically generate SQL queries to search for Concept IDs. +4. **Putting It All Together:** Run the function and review the results. + +## Step 1: Setup and Authentication + +The Quarto document begins with a header that includes essential metadata (title, author, and date). The first code chunk loads the required libraries and prepares the environment. + +Next, the following R code chunk loads the necessary libraries and suppresses the output for a clean setup: + +```{r, warning=FALSE, message=FALSE} +library(bigrquery) +library(dplyr) +library(DBI) +library(dbplyr) +library(glue) + +# Authenticate with BigQuery +bigrquery::bq_auth() +``` + +## Step 2: Establishing the Database Connection + +Here, we specify the dataset and project, authenticate with BigQuery, and open a connection. Finally, we list the tables in the dataset to confirm that the connection is working: + +```{r} +# Specify dataset and project details +dataset <- "FlatConnect" +project <- "nih-nci-dceg-connect-dev" + +# Establish connection to BigQuery +con <- DBI::dbConnect(bigrquery::bigquery(), + project = project, + dataset = dataset, + billing = project) + +# List available tables to verify the connection +DBI::dbListTables(con) +``` + +## Step 3: Querying the Information Schema + +The next code block runs a SQL query on the `INFORMATION_SCHEMA.COLUMNS` to locate columns that contain specific Concept IDs. This query filters columns based on a pattern present in their names: + +```{sql, connection=con} +SELECT + table_catalog, + table_schema, + table_name, + column_name +FROM FlatConnect.INFORMATION_SCHEMA.COLUMNS +WHERE column_name + LIKE '%158409298%261863326%'; +``` + +## Step 4: Building a Dynamic Query Function + +To make the querying process more flexible, we define an R function named `get_schema_info`. This function accepts a vector of Concept IDs and a tier (development, staging, or production), constructs the appropriate SQL query dynamically, and returns the result: + +```{r} +get_schema_info <- function(con, cids, tier, dataset = 'FlatConnect') { + + # Determine the project based on the tier + project <- switch(tier, + dev = "nih-nci-dceg-connect-dev", + stg = "nih-nci-dceg-connect-stg-5519", + prod = "nih-nci-dceg-connect-prod-6d04") + + # Collapse the Concept IDs into a single string with "%" delimiters (e.g., "%cid1%cid2%cid3%") + cid_str <- paste0("%", paste(cids, collapse = "%"), "%") + print(glue("cid_str: {cid_str}\n\n")) + + # Construct the SQL query dynamically + sql <- glue::glue("SELECT table_catalog, table_schema, table_name, column_name + FROM `{project}.{dataset}`.INFORMATION_SCHEMA.COLUMNS + WHERE column_name LIKE '{cid_str}'") + print(glue("SQL Query: \n{sql}\n\n")) + + # Execute the query and store the result + result <- DBI::dbGetQuery(con, sql) + + return(result) +} + +# Execute the function with a vector of Concept IDs and store the output in df +df <- get_schema_info(con, c('158409298', '261863326'), tier='dev') + +# Display the query results +df +``` + +## Conclusion + +This tutorial has shown you how to query BigQuery's `INFORMATION_SCHEMA` to find the locations of specific Concept IDs within your database. By combining R scripting and SQL queries within a Quarto document, you can automate schema exploration and streamline the process of database management and analysis. + +Feel free to modify the query function or extend this approach for different types of metadata searches. Happy querying! diff --git a/meeting-notes.qmd b/meeting-notes.qmd index ab4a89b..5e43a0f 100644 --- a/meeting-notes.qmd +++ b/meeting-notes.qmd @@ -5,5 +5,5 @@ listing: fields: [date, title, subtitle] sort: "date desc" categories: false - contents: "meeting-notes/*.qmd" + contents: "meeting-notes/*.*md" --- diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E-contents b/meeting-notes/2024-10-04.md similarity index 100% rename from .Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E-contents rename to meeting-notes/2024-10-04.md diff --git a/meeting-notes/2024-10-04.qmd b/meeting-notes/2024-10-04.qmd deleted file mode 100644 index 58168ec..0000000 --- a/meeting-notes/2024-10-04.qmd +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Counts by Period -subtitle: Optimizing counts by period using DBI -date: 2024-10-04 -author: Jake Peters ---- - -### Attendees: -JP, KD, BC, LO - -### Tutorial: Count by Period w/ DBI -JP walked through the [Count by Period w/ DBI](https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html) tutorial to demonstrate some code he wrote to create counts by day, week, month, quarter or year using a DBI connection to avoid downloading the data until after aggreagion. This was intended to address some issues that Kelsey was having with the Biospecimen Report which has many tables that are aggregated by date, precluding her from using DBI easily. - -### What do we want to use this workshop for? - -- Leila: - - - Use this to refactor code that is written, but need guidance to improve or use better practices - - - Debug problem code together - -- Jake - - - Tackle big, recurring problems together - - - Write functions that can be reused - -- Kelsey: - - - Refactoring code that has been re-used or passed repeatedly - - - Even working with ChatGPT - - - Using SQL to do transformations THEN pulling the data down - -- Brittany: - - - SQL specific discussion. - - - We are all a little weaker in SQL. - -- SQL: - - - Joins - - - Unions - - - Transformations - - - Group By (with counts) - - - Ungroup after counts (when is this necessary and why) - - - SQL Course on Coursera - -### Second Workshop: - -- Code Snippets - -### Future Worshops: - -- Joining Module 1 v1 and v2 Joining Module 2 v1 and v2 diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2-contents b/meeting-notes/2024-10-11.md similarity index 100% rename from .Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2-contents rename to meeting-notes/2024-10-11.md diff --git a/meeting-notes/2024-10-11.qmd b/meeting-notes/2024-10-11.qmd deleted file mode 100644 index f5058c3..0000000 --- a/meeting-notes/2024-10-11.qmd +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Snippets -subtitle: Custom code snippets in RStudio -date: 2024-10-11 -author: Jake Peters ---- - -### Attendees - -JP, LO, BC - -### Notes - -- JP walked through a [RStudio Snippets](../tutorials/snippets.qmd) tutorial. - -- LO and BC set up custom RStudio Snippets on their local machines. diff --git a/meeting-notes/2024-10-25.md b/meeting-notes/2024-10-25.md new file mode 100644 index 0000000..742ab58 --- /dev/null +++ b/meeting-notes/2024-10-25.md @@ -0,0 +1,16 @@ +--- +title: Wiki +subtitle: Introduce Analytics at Scale wiki +date: 2024-10-25 +--- + +# Attendees: + +JP, BC, LO, KD + +# Agenda + +- Introduce Wiki +- Discuss agenda for next meeting + - Leila volunteered to give tutorial on `ggplot` templating on Nov. 1 + - Kelsey to do workshop on kable tables next diff --git a/queries/ab_message_variant_lookup.sql b/queries/ab_message_variant_lookup.sql new file mode 100644 index 0000000..24a374c --- /dev/null +++ b/queries/ab_message_variant_lookup.sql @@ -0,0 +1,20 @@ +-- Source sheet: AB Variable +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +token, +state_d_956485028, +CASE +WHEN state_d_956485028 = '562663942' THEN 'Altruism Personal' +WHEN state_d_956485028 = '686986259' THEN 'Altruism General' +WHEN state_d_956485028 = '477331464' THEN 'Cancer Connection Personal' +WHEN state_d_956485028 = '935486262' THEN 'Cancer Connection General' +WHEN state_d_956485028 = '518814501' THEN 'Research Personal' +WHEN state_d_956485028 = '307763550' THEN 'Research General' +ELSE NULL +END +AS RcrtSI_ABMessage_v1r0 +FROM +`nih-nci-dceg-connect-stg-5519.FlatConnect.participants` +WHERE +token IN ( ) diff --git a/queries/baseline_biospecimen_refusal_and_collection_dates.sql b/queries/baseline_biospecimen_refusal_and_collection_dates.sql new file mode 100644 index 0000000..9ca8322 --- /dev/null +++ b/queries/baseline_biospecimen_refusal_and_collection_dates.sql @@ -0,0 +1,141 @@ +-- Source sheet: Biospecimen 1525 +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +Connect_ID, +d_685002411_d_217367618 AS `Refused Baseline Specimen Surveys`, +d_657475009 AS `Refused Baseline specimen surveys date`, +d_561681068 AS `Research collection of Baseline blood sample date`, +d_173836415_d_266600170_d_982213346 AS `Clinical collection of Baseline blood sample date`, +d_847159717 AS `Research collection of Baseline urine sample date`, +d_173836415_d_266600170_d_139245758 AS `Clinical collection of Baseline urine sample date`, +d_448660695 AS `Research collection of Baseline MW sample date` + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` + +WHERE + +d_685002411_d_217367618 = "353358909" + + + + + +WITH refusals AS ( +SELECT +Connect_ID, +-- refusal indicator and date +d_685002411_d_217367618 AS refused_biospec_survey, +d_657475009 AS refusal_date, + +-- specimen collection dates +d_561681068 AS research_blood_date, +d_173836415_d_266600170_d_982213346 AS clinical_blood_date, +d_847159717 AS research_urine_date, +d_173836415_d_266600170_d_139245758 AS clinical_urine_date, +d_448660695 AS research_mw_date +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_685002411_d_217367618 = "353358909" -- refused +), + +classified AS ( +SELECT +*, +-- earliest specimen date across blood, urine, and mouthwash +LEAST( +research_blood_date, +clinical_blood_date, +research_urine_date, +clinical_urine_date, +research_mw_date +) AS earliest_specimen_date, + +-- determine whether refusal happened before specimen donation +CASE +WHEN refusal_date IS NOT NULL +AND ( +research_blood_date > refusal_date OR research_blood_date IS NULL +) +AND ( +clinical_blood_date > refusal_date OR clinical_blood_date IS NULL +) +AND ( +research_urine_date > refusal_date OR research_urine_date IS NULL +) +AND ( +clinical_urine_date > refusal_date OR clinical_urine_date IS NULL +) +AND ( +research_mw_date > refusal_date OR research_mw_date IS NULL +) +THEN 1 +ELSE 0 +END AS refused_before_any_specimen +FROM refusals +) + +SELECT +COUNT(*) AS total_refused, -- denominator +SUM(refused_before_any_specimen) AS refused_before_specimen, -- numerator +SAFE_DIVIDE(SUM(refused_before_any_specimen), COUNT(*)) AS proportion_refused_before_specimen +FROM classified; + + + + +AUTUMN'S CODE + +-- CALCULATE DENOMINATOR: +SELECT count(*) as denominator_count +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE +d_685002411_d_217367618 = "353358909" --refused biospecimen survey + +--- separate run +-- CALCULATE NUMERATOR: +-- Subset participant data as 'Timestamp_Flags' to create flags: +WITH Timestamp_Flags as ( +SELECT +Connect_ID, +d_657475009, +d_685002411_d_217367618, +d_173836415_d_266600170_d_561681068 AS research_blood_dt, +d_173836415_d_266600170_d_982213346 AS clinical_blood_dt, +d_173836415_d_266600170_d_847159717 AS research_urine_dt, +d_173836415_d_266600170_d_139245758 AS clinical_urine_dt, +d_173836415_d_266600170_d_448660695 AS research_MW_dt, +-- Create a flag for whenever collection occurred after refusal: +CASE WHEN d_173836415_d_266600170_d_561681068 > d_657475009 THEN "Yes" ELSE "No" END +AS research_blood_after_refusal, +CASE WHEN d_173836415_d_266600170_d_982213346 > d_657475009 THEN "Yes" ELSE "No" END +AS clinical_blood_after_refusal, +CASE WHEN d_173836415_d_266600170_d_847159717 > d_657475009 THEN "Yes" ELSE "No" END +AS research_urine_after_refusal, +CASE WHEN d_173836415_d_266600170_d_139245758 > d_657475009 THEN "Yes" ELSE "No" END +AS clinical_urine_after_refusal, +CASE WHEN d_173836415_d_266600170_d_448660695 > d_657475009 THEN "Yes" ELSE "No" END +AS research_MW_after_refusal +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +) +-- Select from Timestamp_Flags data: +SELECT +Connect_ID, +CASE WHEN d_685002411_d_217367618 = "353358909" THEN "Yes" ELSE "No" +END AS `Refused Baseline Specimen Survey`, +d_657475009 AS `Refused Baseline Specimen Survey Date`, +-- Include timestamp flag variables +research_blood_dt, +clinical_blood_dt, +research_urine_dt, +clinical_urine_dt, +research_MW_dt +FROM Timestamp_Flags +WHERE +d_685002411_d_217367618 = "353358909" --refused biospecimen survey +AND ( +research_blood_after_refusal = "Yes" +OR clinical_blood_after_refusal = "Yes" +OR research_urine_after_refusal = "Yes" +OR clinical_urine_after_refusal = "Yes" +OR research_MW_after_refusal = "Yes" +); diff --git a/queries/baseline_survey_completion_r_logic_reference.sql b/queries/baseline_survey_completion_r_logic_reference.sql new file mode 100644 index 0000000..9b0549f --- /dev/null +++ b/queries/baseline_survey_completion_r_logic_reference.sql @@ -0,0 +1,24 @@ +-- Source sheet: Completions +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +/* +This sheet contains reference logic that is not SQL; content preserved as a block comment. + +Baseline Survey Completions + +process = case_when(d_100767870 == 353358909 ~ "All", +d_949302066 != 231311385 ~ "None", +d_949302066 == 231311385 & d_536735468 == 231311385 & d_976570371 == 231311385 ~ "BOH, MRE, and SAS Sections", +d_949302066 == 231311385 & d_976570371 == 231311385 & d_663265240 == 231311385 ~ "BOH, SAS, and LAW Sections", +d_949302066 == 231311385 & d_536735468 == 231311385 & d_663265240 == 231311385 ~ "BOH, MRE, and LAW Sections", +d_949302066 == 231311385 & d_536735468 == 231311385 ~ "BOH and MRE Sections", +d_949302066 == 231311385 & d_976570371 == 231311385 ~ "BOH and SAS Sections", +d_949302066 == 231311385 & d_663265240 == 231311385 ~ "BOH and LAW Sections", +d_949302066 == 231311385 ~ "BOH Section only"), +BL_comp = case_when(d_100767870==353358909 ~ "All", +process=="None" ~ "None", +process=="BOH Section only" ~ "BOH Only", +TRUE ~ "2 or 3 Sections") +BSL_compl = case_when(d_100767870==353358909 ~ "All Baseline Modules Completed", +TRUE ~ "One or More Baseline Modules Not Completed") +*/ diff --git a/queries/biospec_collection_cup_not_returned_flags.sql b/queries/biospec_collection_cup_not_returned_flags.sql new file mode 100644 index 0000000..dd7c45d --- /dev/null +++ b/queries/biospec_collection_cup_not_returned_flags.sql @@ -0,0 +1,37 @@ +-- Source sheet: Biospec 990 +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +Connect_ID, +d_633640710_d_427719697 AS `Collection Cup Not Returned`, +d_633640710_d_309189173 AS `Collection Cup Leaked - Total Sample Loss` +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.kitAssembly` +WHERE +d_633640710_d_427719697 = "353358909" +OR d_633640710_d_309189173 = "353358909"; + + +-- w/ named variables + +SELECT +Connect_ID, +CASE +WHEN d_633640710_d_427719697 = "353358909" THEN "Yes" +WHEN d_633640710_d_427719697 = "104430631" THEN "No" +ELSE NULL +END AS `BioKit_CollCupNotRet_v1r0`, +/* +CASE +WHEN d_633640710_d_309189173 = "353358909" THEN "Yes" +WHEN d_633640710_d_309189173 = "104430631" THEN "No" +ELSE NULL +END AS `BioKit_CollCupLeakTotal_v1r0`, +*/ +d_259846815 AS `BioKit_MWCupID_v1r0`, +d_826941471 AS `BioKit_KitRecdTm_v1r0` +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.kitAssembly` +WHERE +d_633640710_d_427719697 IN ("353358909") +-- OR d_633640710_d_309189173 IN ("353358909"); diff --git a/queries/connect_id_list_ordered_output_template.sql b/queries/connect_id_list_ordered_output_template.sql new file mode 100644 index 0000000..50b3614 --- /dev/null +++ b/queries/connect_id_list_ordered_output_template.sql @@ -0,0 +1,33 @@ +-- Source sheet: ORDER BY CONNECTID +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH id_list AS ( +SELECT id AS Connect_ID, offset +FROM UNNEST([ +*Add list of ConnectIDs here +]) AS id WITH OFFSET +) + +SELECT +t.Connect_ID, + +CASE +WHEN t.d_878865966 = '104430631' THEN 'No' +WHEN t.d_878865966 = '353358909' THEN 'Yes' +END AS BioFin_BaseBloodCol_v1r0, + +CASE +WHEN t.d_684635302 = '104430631' THEN 'No' +WHEN t.d_684635302 = '353358909' THEN 'Yes' +END AS BioFin_BaseMouthCol_v1r0, + +CASE +WHEN t.d_167958071 = '104430631' THEN 'No' +WHEN t.d_167958071 = '353358909' THEN 'Yes' +END AS BioFin_BaseUrineCol_v1r0 + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` t +JOIN id_list i +ON t.Connect_ID = i.Connect_ID + +ORDER BY i.offset diff --git a/queries/country_of_origin_lookup.sql b/queries/country_of_origin_lookup.sql new file mode 100644 index 0000000..d4b9a3f --- /dev/null +++ b/queries/country_of_origin_lookup.sql @@ -0,0 +1,39 @@ +-- Source sheet: Country of Origin +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT + +Connect_ID, + +CASE +WHEN d_837244890_integer = '360734594' then 'France' +WHEN d_837244890_integer = '602510992' then 'Portugal' +WHEN d_837244890_integer = '581659944' then 'Thailand' +END as country_new_integer, + + +d_837244890_integer as country_new_integer, + +d_384576626 as country_old, +d_876546260 as birth_city, +d_337485417 as birth_state, +d_384576626 as birth_country, + +FROM `nih-nci-dceg-connect-dev.FlatConnect.participants` +WHERE Connect_ID in ( + + + + + + +) + +ORDER BY CASE Connect_ID +WHEN '' THEN 1 +WHEN '' THEN 2 +WHEN '' THEN 3 +WHEN '' THEN 4 +WHEN '' THEN 5 +ELSE 6 +END; diff --git a/queries/date_timestamp_filtering_examples.sql b/queries/date_timestamp_filtering_examples.sql new file mode 100644 index 0000000..d6a77df --- /dev/null +++ b/queries/date_timestamp_filtering_examples.sql @@ -0,0 +1,32 @@ +-- Source sheet: DATE vs. DATE(TIMESTAMP) +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WHERE +DATE(TIMESTAMP(d_914594314)) <= DATE('2025-08-01') + + + +-- EXAMPLE 2: +"Both groups will have the below conditionality: +RcrtV_Verification_v1r0 = 1 +AND HdWd_Activepart_v1r0 = 0 +AND HdRef_Allsrv_v1r0 = 0 +AND HdWd_Deceased_v1r0 =0 +AND HdWd_WdConsent_v1r0 = 0 + +Group 1 addtl conditionality: +AND RcrtV_VerificationTm_V1R0 LE 07/07/2024 + +Group 2 addtl conditionality: +AND RcrtV_VerificationTm_V1R0 GE 07/08/2024 AND LE 08/01/2025 " + + +WHERE +d_821247024 = '197316935' +AND d_906417725 = '104430631' +AND d_685002411_d_867203506 = '104430631' +AND d_987563196 = '104430631' +AND d_747006172 = '104430631' +-- AND DATE(TIMESTAMP(d_914594314)) <= DATE('2024-07-07') +AND DATE(TIMESTAMP(d_914594314)) >= DATE('2024-07-08') +AND DATE(TIMESTAMP(d_914594314)) <= DATE('2025-08-01') diff --git a/queries/deceased_hipaa_withdrawal_flags.sql b/queries/deceased_hipaa_withdrawal_flags.sql new file mode 100644 index 0000000..ed28315 --- /dev/null +++ b/queries/deceased_hipaa_withdrawal_flags.sql @@ -0,0 +1,37 @@ +-- Source sheet: Deceased_HIPPA Flags +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CREATE TEMP FUNCTION yes_no_flag(x STRING) + +AS ( +CASE +WHEN x = '104430631' THEN 'No' +WHEN x = '353358909' THEN 'Yes' +ELSE NULL +END +); +SELECT +Connect_ID, +CASE +WHEN d_912301837 = '208325815' THEN 'No Refusal' +WHEN d_912301837 = '622008261' THEN 'Refused some activities' +WHEN d_912301837 = '458508122' THEN 'Refused all future activities' +WHEN d_912301837 = '872012139' THEN 'Revoked HIPAA only' +WHEN d_912301837 = '854021266' THEN 'Withdrew consent' +WHEN d_912301837 = '241236037' THEN 'Data Destruction Requested' +WHEN d_912301837 = '884452262' THEN 'Data destroyed' +WHEN d_912301837 = '618686157' THEN 'Deceased' +END AS SMMet_PartStatus_v1r0, + +yes_no_flag(d_685002411_d_994064239) AS HdRef_Basesrv_v1r0, +yes_no_flag(d_685002411_d_194410742) AS HdRef_Baseblood_v1r0, +yes_no_flag(d_685002411_d_949501163) AS HdRef_Baseurine_v1r0, +yes_no_flag(d_685002411_d_277479354) AS HdRef_Basesaliva_v1r0, +yes_no_flag(d_685002411_d_867203506) AS HdRef_Allsrv_v1r0, +yes_no_flag(d_685002411_d_352996056) AS HdRef_Allsample_v1r0, +yes_no_flag(d_685002411_d_217367618) AS HdRef_BlSpecSrv_v1r0, +yes_no_flag(d_747006172) AS HdWd_WdConsent_v1r0, +yes_no_flag(d_906417725) AS HdWd_Activepart_v1r0, +yes_no_flag(d_773707518) AS HdWd_HIPAArevoked_v1r0, +yes_no_flag(d_831041022) AS HdWd_Destroydata_v1r0, +yes_no_flag(d_987563196) AS HdWd_Deceased_v1r0 diff --git a/queries/deidentified_site_race_sex_r_logic_reference.sql b/queries/deidentified_site_race_sex_r_logic_reference.sql new file mode 100644 index 0000000..12376bc --- /dev/null +++ b/queries/deidentified_site_race_sex_r_logic_reference.sql @@ -0,0 +1,25 @@ +-- Source sheet: De-identified site data +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +/* +This sheet contains reference logic that is not SQL; content preserved as a block comment. + +Race and Sex Data from Sites + +race = case_when(state_d_684926335 == '635279662' |state_d_849518448 == '768826601' | + state_d_119643471 == '635279662' | state_d_253532712=='723775357' ~ "White, Non-Hispanic" , + state_d_684926335 %in% c('232334767', '401335456') | + state_d_849518448 == '181769837' | + state_d_253532712 %in% c('153444133','572474909','308427446', + '211228524','277568192','611398522','181769837') | + state_d_119643471 %in% c('232334767','211228524','308427446','432722256', + '232663805','785578696','200929978','490725843','965998904') ~ "Other", + state_d_684926335 == '178420302' | + state_d_849518448 == '178420302' | + state_d_253532712 == '178420302' | + state_d_119643471 %in% c( '986445321','746038746','178420302') | + (is.na(state_d_119643471) & d_827220437 == '657167265') ~ "Unknown"), +sex = case_when(state_d_706256705 == '536341288' | state_d_435027713 == '536341288' ~ "Female", + state_d_706256705 == '654207589' | state_d_435027713 == '654207589' ~ "Male", + #state_d_706256705 == '830573274' ~ "Intersex or Other", # too small of a count for now, need to combine with unknown +*/ diff --git a/queries/dhq3_reference_queries.sql b/queries/dhq3_reference_queries.sql new file mode 100644 index 0000000..2523085 --- /dev/null +++ b/queries/dhq3_reference_queries.sql @@ -0,0 +1,71 @@ +-- Source sheet: DHQ3 +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT *, +d_148184166 as DHQ3Username, +d_262294850 as DHQ3ProcessedTm, +d_196723965 as DHQ3StudyID, + +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` + + +SELECT DISTINCT *, +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_148184166 IS NOT NULL + + +SELECT DISTINCT d_148184166 AS DHQ3Username +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_148184166 IS NOT NULL +ORDER BY DHQ3Username + +SELECT DISTINCT d_262294850 AS DHQ3ProcessedTm +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_262294850 IS NOT NULL + +SELECT DISTINCT d_196723965 AS DHQ3StudyID +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_196723965 IS NOT NULL + + +SELECT *, +d_148184166 as DHQ3Username, +d_262294850 as DHQ3ProcessedTm, +d_196723965 as DHQ3StudyID, + +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` + + +SELECT DISTINCT d_148184166 AS DHQ3Username +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` +WHERE d_148184166 IS NOT NULL +ORDER BY DHQ3Username + +SELECT DISTINCT d_262294850 AS DHQ3ProcessedTm +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` +WHERE d_262294850 IS NOT NULL + +SELECT DISTINCT d_196723965 AS DHQ3StudyID +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` +WHERE d_196723965 IS NOT NULL + + +SELECT *, +d_148184166 as DHQ3Username, +d_262294850 as DHQ3ProcessedTm, +d_196723965 as DHQ3StudyID, + +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` + +SELECT DISTINCT d_148184166 AS DHQ3Username +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` +WHERE d_148184166 IS NOT NULL +ORDER BY DHQ3Username + +SELECT DISTINCT d_262294850 AS DHQ3ProcessedTm +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` +WHERE d_262294850 IS NOT NULL + +SELECT DISTINCT d_196723965 AS DHQ3StudyID +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` +WHERE d_196723965 IS NOT NULL diff --git a/queries/duplicate_tokens_by_site.sql b/queries/duplicate_tokens_by_site.sql new file mode 100644 index 0000000..4c71c18 --- /dev/null +++ b/queries/duplicate_tokens_by_site.sql @@ -0,0 +1,51 @@ +-- Source sheet: Duplicate Tokens +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT token, +state_studyId, +CASE d_827220437 +WHEN '125001209' THEN 'Kaiser Permanente Colorado' +WHEN '181769837' THEN 'Other' +WHEN '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN '303349821' THEN 'Marshfield Clinic Health System' +WHEN '327912200' THEN 'Kaiser Permanente Georgia' +WHEN '452412599' THEN 'Kaiser Permanente Northwest' +WHEN '517700004' THEN 'National Cancer Institute' +WHEN '531629870' THEN 'HealthPartners' +WHEN '548392715' THEN 'Henry Ford Health System' +WHEN '657167265' THEN 'Sanford Health' +WHEN '809703864' THEN 'University of Chicago Medicine' +ELSE NULL +END AS Site, +COUNT(*) AS dup_count +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +GROUP BY token, state_studyId, Site +HAVING dup_count > 1 + +SELECT +d_827220437, +Connect_ID, +ARRAY_AGG(DISTINCT token) AS tokens, +COUNT(DISTINCT token) AS num_tokens +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE +Connect_ID IS NOT NULL AND d_827220437 IS NOT NULL +GROUP BY +d_827220437, Connect_ID +HAVING +COUNT(DISTINCT token) > 1 + + +SELECT +Connect_ID, +ARRAY_AGG(DISTINCT token) AS tokens, +COUNT(DISTINCT token) AS num_tokens +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE +Connect_ID IS NOT NULL +GROUP BY +Connect_ID +HAVING +COUNT(DISTINCT token) > 1 diff --git a/queries/ehr_birthdate_completeness_counts.sql b/queries/ehr_birthdate_completeness_counts.sql new file mode 100644 index 0000000..d52dfc1 --- /dev/null +++ b/queries/ehr_birthdate_completeness_counts.sql @@ -0,0 +1,147 @@ +-- Source sheet: EHR Age Counts +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH combined AS ( +SELECT +'ehr_healthpartners' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_healthpartners.person` + +UNION ALL +SELECT +'ehr_henry_ford' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_henry_ford.person` + +UNION ALL +SELECT +'ehr_kp_colorado' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_colorado.person` + +UNION ALL +SELECT +'ehr_kp_georgia' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_georgia.person` + +UNION ALL +SELECT +'ehr_kp_hawaii' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_hawaii.person` + +UNION ALL +SELECT +'ehr_kp_northwest' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_northwest.person` + +UNION ALL +SELECT +'ehr_marshfield' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_marshfield.person` + +UNION ALL +SELECT +'ehr_uchicago' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_uchicago.person` +), + +with_age AS ( +SELECT +source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime, +SAFE.PARSE_DATE( +'%Y-%m-%d', +FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1)) +) AS DOB, +DATE_DIFF( +CURRENT_DATE(), +SAFE.PARSE_DATE( +'%Y-%m-%d', +FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1)) +), +YEAR +) AS age, +CASE +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) < 30 THEN '<30' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 30 AND 39 THEN '30-39' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 40 AND 49 THEN '40-49' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 50 AND 59 THEN '50-59' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 60 AND 69 THEN '60-69' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 70 AND 79 THEN '70-79' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) >= 80 THEN '80+' +ELSE NULL +END AS age_group +FROM combined +) + +-- 1️⃣ Count by source and age_group +, counts_by_source AS ( +SELECT +source, +age_group, +COUNT(*) AS person_count +FROM with_age +WHERE age_group IS NOT NULL +GROUP BY source, age_group +) + +-- 2️⃣ Count by age_group overall +, counts_overall AS ( +SELECT +age_group, +COUNT(*) AS total_persons +FROM with_age +WHERE age_group IS NOT NULL +GROUP BY age_group +) + +-- Final output: combine both summaries +SELECT +'By Source' AS summary_type, +source, +age_group, +person_count AS count +FROM counts_by_source + +UNION ALL + +SELECT +'Overall' AS summary_type, +NULL AS source, +age_group, +total_persons AS count +FROM counts_overall + +ORDER BY summary_type, source, age_group; diff --git a/queries/ehr_distinct_participant_counts_by_source.sql b/queries/ehr_distinct_participant_counts_by_source.sql new file mode 100644 index 0000000..55f15cc --- /dev/null +++ b/queries/ehr_distinct_participant_counts_by_source.sql @@ -0,0 +1,77 @@ +-- Source sheet: EHR Counts +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH base AS ( +SELECT DISTINCT Connect_ID +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = '197316935' +AND d_747006172 = '104430631' +AND d_773707518 = '104430631' +), +counts AS ( +SELECT 'ehr_healthpartners' AS ehr_source, +COUNT(DISTINCT b.Connect_ID) AS distinct_count +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_healthpartners.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_henry_ford', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_henry_ford.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_colorado', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_colorado.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_georgia', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_georgia.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_hawaii', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_hawaii.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_northwest', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_northwest.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_marshfield', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_marshfield.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_sanford', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_sanford.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_uchicago', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_uchicago.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) +) + +SELECT * +FROM counts +ORDER BY ehr_source; diff --git a/queries/information_schema_column_search_queries.sql b/queries/information_schema_column_search_queries.sql new file mode 100644 index 0000000..9e57626 --- /dev/null +++ b/queries/information_schema_column_search_queries.sql @@ -0,0 +1,5 @@ +-- Source sheet: Search SCHEMAS +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +select * from Connect.INFORMATION_SCHEMA.COLUMNS; +select * from FlatConnect.INFORMATION_SCHEMA.COLUMNS; diff --git a/queries/module1_sex_from_v1_v2.sql b/queries/module1_sex_from_v1_v2.sql new file mode 100644 index 0000000..99544d9 --- /dev/null +++ b/queries/module1_sex_from_v1_v2.sql @@ -0,0 +1,20 @@ +-- Source sheet: M1_M2_Sex +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +CASE +WHEN m1v1.d_407056417 = '536341288' OR m1v2.d_407056417 = '536341288' THEN 'Female' +WHEN m1v1.d_407056417 = '654207589' OR m1v2.d_407056417 = '654207589' THEN 'Male' +WHEN m1v1.d_407056417 IN ('576796184','830573274') OR m1v2.d_407056417 IN ('576796184','830573274') THEN 'Intersex/Other' +WHEN m1v1.d_407056417 IS NULL OR m1v2.d_407056417 IS NULL THEN 'N/A' +END AS Sex_Reported, +COALESCE(m1v2.Connect_ID, m1v1.Connect_ID) AS Connect_ID, +COALESCE(m1v2.D_407056417, m1v1.D_407056417) AS sr_sex +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.module1_v2_JP` m1v2 +FULL OUTER JOIN +`nih-nci-dceg-connect-prod-6d04.FlatConnect.module1_v1_JP` m1v1 +ON +m1v1.Connect_ID = m1v2.Connect_ID +WHERE +COALESCE(m1v2.Connect_ID, m1v1.Connect_ID) IN ( diff --git a/queries/participant_age_groups_from_verification_and_dob.sql b/queries/participant_age_groups_from_verification_and_dob.sql new file mode 100644 index 0000000..a000e76 --- /dev/null +++ b/queries/participant_age_groups_from_verification_and_dob.sql @@ -0,0 +1,39 @@ +-- Source sheet: Age Groups from Participants +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH typed AS ( +SELECT +-- Verification date: ISO 8601 format (e.g., 2020-07-17T15:21:26.763Z) +DATE(SAFE.PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%Ez', CAST(d_914594314 AS STRING))) AS verif_date, +-- DOB: yyyymmdd format (string or int) +SAFE.PARSE_DATE('%Y%m%d', CAST(d_371067537 AS STRING)) AS dob +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = "197316935" +), +ages AS ( +SELECT +DATE_DIFF(verif_date, dob, YEAR) AS age_years +FROM typed +WHERE verif_date IS NOT NULL AND dob IS NOT NULL +) +SELECT +CASE +WHEN age_years BETWEEN 30 AND 39 THEN '30-39' +WHEN age_years BETWEEN 40 AND 49 THEN '40-49' +WHEN age_years BETWEEN 50 AND 59 THEN '50-59' +WHEN age_years BETWEEN 60 AND 69 THEN '60-69' +WHEN age_years BETWEEN 70 AND 79 THEN '70-79' +ELSE 'Other' +END AS age_group, +COUNT(*) AS count +FROM ages +GROUP BY age_group +ORDER BY +CASE age_group +WHEN '30-39' THEN 1 +WHEN '40-49' THEN 2 +WHEN '50-59' THEN 3 +WHEN '60-69' THEN 4 +WHEN '70-79' THEN 5 +ELSE 6 +END; diff --git a/queries/participant_physical_and_alternate_addresses.sql b/queries/participant_physical_and_alternate_addresses.sql new file mode 100644 index 0000000..07ec7e7 --- /dev/null +++ b/queries/participant_physical_and_alternate_addresses.sql @@ -0,0 +1,24 @@ +-- Source sheet: Physical Address +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT + +Connect_ID, + +d_284580415 as Alternative_address_line1, +d_728926441 as Alternative_address_line2, +d_907038282 as Alternative_address_city, +d_970839481 as Alternative_address_state, +d_379899229 as Alternative_address_zip, +d_810747471 as Alternate_address_PObox, + +d_207908218 as Phys_Address_line1, +d_224392018 as Phys_address_line2, +d_451993790 as Physical_address_city, +d_187799450 as Physical_address_state, +d_449168732 as Physical_address_zipcode, + + +FROM `nih-nci-dceg-connect-stg-5519.FlatConnect.participants` + +WHERE Connect_ID = diff --git a/queries/participant_twin_status_lookup.sql b/queries/participant_twin_status_lookup.sql new file mode 100644 index 0000000..5d47057 --- /dev/null +++ b/queries/participant_twin_status_lookup.sql @@ -0,0 +1,22 @@ +-- Source sheet: Twins +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +p.Connect_ID, +m.D_992987417, +CASE +WHEN m.D_992987417 = "104430631" THEN 'No' +WHEN m.D_992987417 = "353358909" THEN 'Yes' +WHEN m.D_992987417 = "288105839" THEN 'Yes, fraternal twins (not identical)' +WHEN m.D_992987417 = "626558982" THEN 'Yes, triplets or higher multiple birth' +ELSE 'Unknown' +END AS D_992987417_label +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` p +LEFT JOIN `nih-nci-dceg-connect-prod-6d04.FlatConnect.module1_v2` m +ON p.Connect_ID = m.Connect_ID +WHERE p.Connect_ID IN ( +"", +"", +"", +"" +); diff --git a/queries/quadrennial_review_report.sql b/queries/quadrennial_review_report.sql new file mode 100644 index 0000000..21eedcf --- /dev/null +++ b/queries/quadrennial_review_report.sql @@ -0,0 +1,56 @@ +-- Quadrennial Review Report +-- Request (via email from Amelia Sager to: Autumn Hullings, Nicole Gerlanc, Michelle Brotzman) +-- Re: Quadrennial Review Data Request +-- Connect is required to complete a Quadrennial Review every 4 years for the project and we are preparing for the first submission. This is separate from the annual CIER data submission that Kelsey helps us provide the data for. For the Quadrennial Review, we are being asked to provide just a couple of data points. +-- Date of first verified participant in Connect – we opened enrollment on 07/21/2021, so should be somewhere around that date but need the exact date. +-- Date we reached n=50,000 verified participants exactly – based on the weekly logs, this should be sometime near the week of 10/21/2024 but need the exact date. +-- For this data pull, I do not think it is necessary to exclude anyone who has withdrawn or requested data destruction. Let me know if you have any questions. Thank you! + + + +-- START QUERY: Date of first verified participant and date verified participant count reached 50,000 +-- Data filters: WHERE Verification Status = Verified, Verification D/T (d_914594314) is not null and Connect ID is not null + +WITH verified_participants AS ( + SELECT + Connect_ID, + SAFE_CAST(d_914594314 AS TIMESTAMP) AS verification_ts, + DATE(SAFE_CAST(d_914594314 AS TIMESTAMP)) AS verification_date + FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` + WHERE Connect_ID IS NOT NULL + AND d_821247024 = '197316935' + AND d_914594314 IS NOT NULL +), + +ranked_verified AS ( + SELECT + Connect_ID, + verification_ts, + verification_date, + ROW_NUMBER() OVER ( + ORDER BY verification_ts, Connect_ID + ) AS verified_participant_number + FROM verified_participants +) + +SELECT + 'First verified participant' AS metric, + Connect_ID, + verification_ts, + verification_date, + verified_participant_number +FROM ranked_verified +WHERE verified_participant_number = 1 + +UNION ALL + +SELECT + '50,000th verified participant' AS metric, + Connect_ID, + verification_ts, + verification_date, + verified_participant_number +FROM ranked_verified +WHERE verified_participant_number = 50000 + +ORDER BY verified_participant_number; diff --git a/queries/race_group_r_logic_reference.sql b/queries/race_group_r_logic_reference.sql new file mode 100644 index 0000000..b86f7e6 --- /dev/null +++ b/queries/race_group_r_logic_reference.sql @@ -0,0 +1,76 @@ +-- Source sheet: Race groups +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +/* +This sheet contains reference logic that is not SQL; content preserved as a block comment. + +RACE CATEGORIES (updated Multi-Racial: March 2025) + +race_columns <- c( + "D_384191091_D_384191091_D_583826374", + "D_384191091_D_384191091_D_636411467", + "D_384191091_D_384191091_D_458435048", + "D_384191091_D_384191091_D_706998638", + "D_384191091_D_384191091_D_973565052", + "D_384191091_D_384191091_D_586825330", + "D_384191091_D_384191091_D_412790539", + "D_384191091_D_384191091_D_807835037" +) + +# All data is currently string values; convert "1" and "0" to numeric +# values before summarizing. +module1[race_columns] <- lapply(module1[race_columns], as.numeric) +module1$multi_racial <- ifelse(rowSums(module1[race_columns], na.rm = TRUE) > 1, 1, 0) + +which_race <- module1 %>% + mutate( + race = case_when( + multi_racial == 1 ~ "Multi-Racial", + D_384191091_D_384191091_D_583826374 == 1 ~ "American Indian or Native American", + D_384191091_D_384191091_D_636411467 == 1 ~ "Asian/Asian American", + D_384191091_D_384191091_D_458435048 == 1 ~ "Black, African American, or African", + D_384191091_D_384191091_D_706998638 == 1 ~ "Hispanic, Latino, or Spanish", + D_384191091_D_384191091_D_973565052 == 1 ~ "Middle Eastern or North African", + D_384191091_D_384191091_D_586825330 == 1 ~ "Hawaiian or Pacific Islander", + D_384191091_D_384191091_D_412790539 == 1 ~ "White", + D_384191091_D_384191091_D_807835037 == 1 | + !is.na(D_384191091_D_747350323) ~ "Other", + D_384191091_D_384191091_D_746038746 == 1 ~ "Prefer Not to Answer", + D_384191091_D_384191091_D_178420302 == 1 ~ "Unknown", + TRUE ~ "Skipped this question " + ) + ) + +dt_all_races_summary <- which_race %>% + dplyr::group_by(race) %>% + dplyr::summarize(n = n(), percentage = 100 * n / nrow(.)) %>% + dplyr::ungroup() %>% + dplyr::select(race, n, percentage) + +dt_all_races_summary %>% + gt::gt(rowname_col = "row_lab") %>% + fmt_number(columns = "percentage", decimals = 2) %>% + tab_header( + title = md("Race/Ethnicity of Participants Who Completed BOH Section of First Survey") + ) %>% + cols_label( + n = md("**N**"), + race = md("**Answer**"), + percentage = md("**%**") + ) %>% + grand_summary_rows( + columns = c(n, percentage), + fns = ~sum(., na.rm = TRUE) + ) |> + tab_options( + stub.font.weight = "bold" + ) %>% + tab_style( + style = list( + cell_text(weight = "bold") + ), + locations = cells_body( + columns = race + ) + ) +*/ diff --git a/queries/reinvitation_campaign_type_lookup.sql b/queries/reinvitation_campaign_type_lookup.sql new file mode 100644 index 0000000..d6c34fa --- /dev/null +++ b/queries/reinvitation_campaign_type_lookup.sql @@ -0,0 +1,21 @@ +-- Source sheet: ReInvitation Campaign Type +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT token, d_280021666,d_439351436,d_471593703, +CASE +WHEN d_280021666 = '926338735' THEN "Random" +WHEN d_280021666 = '348281054' THEN "Screening appointment" +WHEN d_280021666 = '324692899' THEN "Non-screening appointment" +WHEN d_280021666 = '351257378' THEN "Demographic Group" +WHEN d_280021666 = '647148178' THEN "Aging out of study" +WHEN d_280021666 = '834544960' THEN "Geographic group" +WHEN d_280021666 = '682916147' THEN "Post-Screening Selection" +WHEN d_280021666 = '153365143' THEN "Technology adapters" +WHEN d_280021666 = '663706936' THEN "Low-income/health professional shortage areas" +WHEN d_280021666 = '208952854' THEN "Research Registry" +WHEN d_280021666 = '296312382' THEN "Pop up" +WHEN d_280021666 = '181769837' THEN "Other" +WHEN d_280021666 = '398561594' THEN "None of these apply" +END AS RcrtSI_RInvCampaignType_v1r0 +FROM `nih-nci-dceg-connect-dev.FlatConnect.participants` +where token IN () diff --git a/queries/site_code_to_label_case_snippet.sql b/queries/site_code_to_label_case_snippet.sql new file mode 100644 index 0000000..31cc86b --- /dev/null +++ b/queries/site_code_to_label_case_snippet.sql @@ -0,0 +1,17 @@ +-- Source sheet: Site +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CASE +WHEN d_827220437 = '531629870' THEN 'HealthPartners' +WHEN d_827220437 = '548392715' THEN 'Henry Ford Health System' +WHEN d_827220437 = '125001209' THEN 'Kaiser Permanente Colorado' +WHEN d_827220437 = '327912200' THEN 'Kaiser Permanente Georgia' +WHEN d_827220437 = '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN d_827220437 = '452412599' THEN 'Kaiser Permanente Northwest' +WHEN d_827220437 = '303349821' THEN 'Marshfield Clinic Health System' +WHEN d_827220437 = '657167265' THEN 'Sanford Health' +WHEN d_827220437 = '809703864' THEN 'University of Chicago Medicine' +WHEN d_827220437 = '517700004' THEN 'National Cancer Institute' +WHEN d_827220437 = '472940358' THEN 'Baylor Scott & White Health' +WHEN d_827220437 = '181769837' THEN 'Other' +END AS RcrtES_Site_v1r0, diff --git a/queries/survey_completion_counts_by_age_group.sql b/queries/survey_completion_counts_by_age_group.sql new file mode 100644 index 0000000..666fc39 --- /dev/null +++ b/queries/survey_completion_counts_by_age_group.sql @@ -0,0 +1,59 @@ +-- Source sheet: Survey Completion by Age +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +IFNULL(age_group, 'Total') AS age_group, +COUNTIF(completion = 'Completed Survey and Sample(s)') AS completed_survey_and_samples, +COUNTIF(completion = 'Completed Survey, No Sample(s)') AS completed_survey_no_samples, +COUNTIF(completion = 'Completed Sample(s), No Survey') AS completed_samples_no_survey, +COUNTIF(completion = 'Completed Neither') AS completed_neither, +COUNT(Connect_ID) AS total +FROM ( +SELECT +Connect_ID, +CASE +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2022 THEN '2022' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2023 THEN '2023' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2024 THEN '2024' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2025 AND EXTRACT(MONTH FROM TIMESTAMP(d_914594314)) BETWEEN 1 AND 3 THEN 'January - March 2025' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2025 AND EXTRACT(MONTH FROM TIMESTAMP(d_914594314)) BETWEEN 4 AND 7 THEN 'April - July 2025' +ELSE 'Other' +END AS time_category, +CASE +WHEN d_100767870 = "353358909" AND +(d_878865966 = "353358909" OR d_684635302 = "353358909" OR d_167958071 = "353358909") +THEN "Completed Survey and Sample(s)" +WHEN d_100767870 = "353358909" +THEN "Completed Survey, No Sample(s)" +WHEN d_878865966 = "353358909" OR d_684635302 = "353358909" OR d_167958071 = "353358909" +THEN "Completed Sample(s), No Survey" +ELSE "Completed Neither" +END AS completion, + +CASE +WHEN state_d_934298480 = '713781738' THEN '30-34' +WHEN state_d_934298480 = '631272782' THEN '35-39' +WHEN state_d_934298480 = '124276120' THEN '40-45' +WHEN state_d_934298480 = '450985724' THEN '46-50' +WHEN state_d_934298480 = '363147933' THEN '51-55' +WHEN state_d_934298480 = '636706443' THEN '56-60' +WHEN state_d_934298480 = '771230670' THEN '61-65' +WHEN state_d_934298480 = '722846087' THEN '66-70' +ELSE 'Unknown' +END AS age_group + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants_JP` +WHERE +d_827220437 = '809703864' -- UChicago +AND d_821247024 = '197316935' -- Verified +AND EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2025 +AND EXTRACT(MONTH FROM TIMESTAMP(d_914594314)) BETWEEN 4 AND 7 +) + +GROUP BY ROLLUP(age_group) +ORDER BY +CASE +WHEN age_group IS NULL THEN 999 -- Total row goes last +WHEN age_group = 'Unknown' THEN 998 +ELSE CAST(SUBSTR(age_group, 1, 2) AS INT64) +END; diff --git a/queries/survey_status_case_mapping_snippets.sql b/queries/survey_status_case_mapping_snippets.sql new file mode 100644 index 0000000..077bfa6 --- /dev/null +++ b/queries/survey_status_case_mapping_snippets.sql @@ -0,0 +1,171 @@ +-- Source sheet: Survey Statuses +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CASE +WHEN d_949302066 = '972455046' THEN 'Not Started' +WHEN d_949302066 = '615768760' THEN 'Started' +WHEN d_949302066 = '231311385' THEN 'Submitted' +END AS SrvBOH_BaseStatus_v1r0, + +D_205553981 AS SrvBOH_TmStart_v1r0, +D_517311251 AS SrvBOH_TmComplete_v1r0, + +-- MRE: Medications, Reproductive Health, Exercise, Sleep +CASE + +WHEN d_536735468 = '615768760' THEN 'Started' +WHEN d_536735468 = '231311385' THEN 'Submitted' +END AS SrvMRE_BaseStatus_v1r0, + +D_541836531 AS SrvMRE_TmStart_v1r0, +D_832139544 AS SrvMRE_TmComplete_v1r0, + +-- SAS: Smoking, Alcohol, Sun Exposure +CASE +WHEN d_976570371 = '972455046' THEN 'Not Started' +WHEN d_976570371 = '615768760' THEN 'Started' +WHEN d_976570371 = '231311385' THEN 'Submitted' +END AS SrvSAS_BaseStatus_v1r0, + +D_386488297 AS SrvSAS_TmStart_v1r0, +D_770257102 AS SrvSAS_TmComplete_v1r0, + +-- LAW: Where You Live and Work +CASE +WHEN d_663265240 = '972455046' THEN 'Not Started' +WHEN d_663265240 = '615768760' THEN 'Started' +WHEN d_663265240 = '231311385' THEN 'Submitted' +END AS SrvLAW_BaseStatus_v1r0, + +d_452942800 AS SrvLAW_TmStart_v1r0, +d_264644252 AS SrvLAW_TmComplete_v1r0, + +-- + +-- BIOSPECIMEN DATA: + +-- Blood/Urine/Mouthwash (BLM): +CASE +WHEN d_265193023 = '972455046' THEN 'Not Started' +WHEN d_265193023 = '615768760' THEN 'Started' +WHEN d_265193023 = '231311385' THEN 'Submitted' +END AS SrvBLM_ResSrvCompl_v1r0, + +d_822499427 as SrvBLM_TmStart_v1r0, +d_222161762 as SrvBLM_TmComplete_v1r0, + +-- Blood/Urine (BLU): +CASE +WHEN d_253883960 = '972455046' THEN 'Not Started' +WHEN d_253883960 = '615768760' THEN 'Started' +WHEN d_253883960 = '231311385' THEN 'Submitted' +END AS SrvBlU_BaseComplete_v1r0, + +d_534669573 as SrvBlU_TmStart_v1r0, +d_764863765 as SrvBlU_TmComplete_v1r0, + +-- Mouthwash (MW): +CASE +WHEN d_547363263 = '972455046' THEN 'Not Started' +WHEN d_547363263 = '615768760' THEN 'Started' +WHEN d_547363263 = '231311385' THEN 'Submitted' +END AS SrvMtW_BaseComplete_v1r0, + +d_286191859 as SrvMtW_TmStart_v1r0, +d_195145666 as SrvMtW_TmComplete_v1r0, + +-- + +-- MENSTRAL CYCLE: +CASE +WHEN d_459098666 = '972455046' THEN 'Not Started' +WHEN d_459098666 = '615768760' THEN 'Started' +WHEN d_459098666 = '231311385' THEN 'Submitted' +END AS SrvMC_BaseComplete_v1r0, + +d_844088537 AS SrvMC_TmStart_v1r0, +d_217640691 AS SrvMC_TmComplete_v1r0, + +-- + +-- COVID: +CASE +WHEN d_220186468 = '972455046' THEN 'Not Started' +WHEN d_220186468 = '615768760' THEN 'Started' +WHEN d_220186468 = '231311385' THEN 'Submitted' +END AS SrvCOV_BaseComplete_v1r0, + +d_268176409 AS SrvCOV_TmStart_v1r0, +d_784810139 AS SrvCOV_TmComplete_v1r0, + +-- + +-- QOL/PROMIS: +CASE +WHEN d_320303124 = '972455046' THEN 'Not Started' +WHEN d_320303124 = '615768760' THEN 'Started' +WHEN d_320303124 = '231311385' THEN 'Submitted' +END AS SrvQOL_3moStatus_v1r0, + +d_870643066 AS SrvQOL_3moTmStart_v1r0, +d_843688458 AS SrvQOL_3moTmComplete_v1r0, + +-- + +-- CES: +CASE +WHEN d_956490759 = '972455046' THEN 'Not Started' +WHEN d_956490759 = '615768760' THEN 'Started' +WHEN d_956490759 = '231311385' THEN 'Submitted' +END AS SrvCoE_ConExpStatus_v1r0, + +d_263355177 as SrvCoE_ConExpTmStart_v1r0, +d_199471989 as SrvCoE_ConExpTmCompl_v1r0, + +-- + +-- CSH: Cancer Screening History +CASE +WHEN d_176068627 = '972455046' THEN 'Not Started' +WHEN d_176068627 = '615768760' THEN 'Started' +WHEN d_176068627 = '231311385' THEN 'Submitted' +END AS SrvScr_CancScrnStatus_v1r0, + +d_609630315 as SrvScr_CancScrnTmStart_v1r0, +d_389890053 as SrvScr_CancScrnTmCompl_v1r0, + +-- + +-- SSN: +CASE +WHEN d_126331570 = '972455046' THEN 'Not Started' +WHEN d_126331570 = '615768760' THEN 'Started' +WHEN d_126331570 = '231311385' THEN 'Submitted' +END AS SrvSS_SSNSurvey_v1r0, + +d_943232079 AS SrvSS_TmStart_v1r0, +d_315032037 AS SrvSS_TmComplete_v1r0, + +-- + +-- DHQ3 (internal): +CASE +WHEN d_692560814 = '972455046' THEN 'Not Started' +WHEN d_692560814 = '615768760' THEN 'Started' +WHEN d_692560814 = '231311385' THEN 'Submitted' +END AS SrvDHQ3_6moStatus_v1r0, + +d_109610692 as SrvDHQ3_6moTmStart_v1r0, +d_610227793 as SrvDHQ3_6moTmComplete_v1r0, + +-- + +-- 2026 ROI: +CASE +WHEN d_278023676 = '972455046' THEN 'Not Started' +WHEN d_278023676 = '615768760' THEN 'Started' +WHEN d_278023676 = '231311385' THEN 'Submitted' +END AS SrvROI_PrefStatus_v1r0, + +d_993557295 AS SrvROI_PrefTmStart_v1r0, +d_543379310 AS SrvROI_PrefTmComplete_v1r0, diff --git a/queries/system_time_as_of_timestamp_snippet.sql b/queries/system_time_as_of_timestamp_snippet.sql new file mode 100644 index 0000000..34d459d --- /dev/null +++ b/queries/system_time_as_of_timestamp_snippet.sql @@ -0,0 +1,6 @@ +-- Source sheet: TIMESTAMP +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +FOR SYSTEM_TIME AS OF TIMESTAMP('2025-05-30 18:34:40.654 UTC') +WHERE diff --git a/queries/verification_and_duplicate_type_case_snippets.sql b/queries/verification_and_duplicate_type_case_snippets.sql new file mode 100644 index 0000000..ab608ad --- /dev/null +++ b/queries/verification_and_duplicate_type_case_snippets.sql @@ -0,0 +1,40 @@ +-- Source sheet: Verification Duplication +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CASE +WHEN d_821247024 = '875007964' THEN 'Not yet verified' +WHEN d_821247024 = '197316935' THEN 'Verified' +WHEN d_821247024 = '219863910' THEN 'Cannot be verified' +WHEN d_821247024 = '922622075' THEN 'Duplicate' +WHEN d_821247024 = '160161595' THEN 'Outreach timed out' +WHEN d_821247024 = '290379732' THEN 'No Longer Enrolling' +END AS RcrtV_Verification_v1r0, + + + +CASE +WHEN state_d_148197146 = '638335430' THEN 'Active recruit signed in as Passive recruit' +WHEN state_d_148197146 = '283434980' THEN 'Not Active recruit signed in as Passive recruit' +WHEN state_d_148197146 = '866029623' THEN 'Not Active recruit signed in as an Active recruit' +WHEN state_d_148197146 = '654558118' THEN 'Participant already enrolled' +WHEN state_d_148197146 = '979256174' THEN 'Passive recruit signed in as Active recruit' +WHEN state_d_148197146 = '696650324' THEN 'Change in Eligibility Status' +END AS Duplicate_Type, + + +CASE +WHEN d_512820379 = '180583933' THEN 'Not Active' +WHEN d_512820379 = '486306141' THEN 'Active' +WHEN d_512820379 = '854703046' THEN 'Passive' +END AS Recruit_Type, + + +CASE +WHEN state_d_793822265 = '132080040' THEN 'No Change Needed' +WHEN state_d_793822265 = '604663208' THEN 'Not Active to Passive' +WHEN state_d_793822265 = '854903954' THEN 'Passive to Active' +WHEN state_d_793822265 = '965707001' THEN 'Active to Passive' +END AS Update_Recruit_Type + + +d_471593703 as Recruitment_date diff --git a/queries/verification_by_outreach_status.sql b/queries/verification_by_outreach_status.sql new file mode 100644 index 0000000..eb672b7 --- /dev/null +++ b/queries/verification_by_outreach_status.sql @@ -0,0 +1,224 @@ +-- Source sheet: Verification-by-Outreach status +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH base AS ( +SELECT +CASE +WHEN state_d_444699761 = '426360242' THEN 'Method Used' +WHEN state_d_444699761 = '734437214' THEN 'Method Not Used' +END AS Auto_Verif, + +CASE +WHEN state_d_953614051 = '426360242' THEN 'Method Used' +WHEN state_d_953614051 = '734437214' THEN 'Method Not Used' +END AS Manual_Verif, + +CASE +WHEN state_d_188797763 = '104430631' THEN 'No' +WHEN state_d_188797763 = '353358909' THEN 'Yes' +END AS Outreach_Required, + +CASE +WHEN d_827220437 = '531629870' THEN 'HealthPartners' +WHEN d_827220437 = '548392715' THEN 'Henry Ford Health System' +WHEN d_827220437 = '125001209' THEN 'Kaiser Permanente Colorado' +WHEN d_827220437 = '327912200' THEN 'Kaiser Permanente Georgia' +WHEN d_827220437 = '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN d_827220437 = '452412599' THEN 'Kaiser Permanente Northwest' +WHEN d_827220437 = '303349821' THEN 'Marshfield Clinic Health System' +WHEN d_827220437 = '657167265' THEN 'Sanford Health' +WHEN d_827220437 = '809703864' THEN 'University of Chicago Medicine' +WHEN d_827220437 = '517700004' THEN 'National Cancer Institute' +WHEN d_827220437 = '472940358' THEN 'Baylor Scott & White Health' +END AS Site + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = '219863910' -- Verification status = cannot be verified +AND d_831041022 = '104430631' -- Data destroy = no +AND state_d_444699761 IS NOT NULL +AND state_d_953614051 IS NOT NULL +AND state_d_188797763 IS NOT NULL +), + +counts AS ( +SELECT +Site, +Auto_Verif, +Manual_Verif, +Outreach_Required, +COUNT(*) AS sample_counts +FROM base +GROUP BY Site, Auto_Verif, Manual_Verif, Outreach_Required +), + +sites AS ( +SELECT * FROM UNNEST([ +'HealthPartners', +'Henry Ford Health System', +'Kaiser Permanente Colorado', +'Kaiser Permanente Georgia', +'Kaiser Permanente Hawaii', +'Kaiser Permanente Northwest', +'Marshfield Clinic Health System', +'Sanford Health', +'University of Chicago Medicine', +'National Cancer Institute', +'Baylor Scott & White Health' +]) AS Site +), + +grid AS ( +SELECT +s.Site, +Auto_Verif, +Manual_Verif, +Outreach_Required +FROM sites s +CROSS JOIN UNNEST(['Method Used','Method Not Used']) AS Auto_Verif +CROSS JOIN UNNEST(['Method Used','Method Not Used']) AS Manual_Verif +CROSS JOIN UNNEST(['Yes','No']) AS Outreach_Required +) + +SELECT +g.Site, +g.Auto_Verif, +g.Manual_Verif, +g.Outreach_Required, +COALESCE(c.sample_counts, 0) AS sample_counts +FROM grid g +LEFT JOIN counts c +ON g.Site = c.Site +AND g.Auto_Verif = c.Auto_Verif +AND g.Manual_Verif = c.Manual_Verif +AND g.Outreach_Required = c.Outreach_Required +ORDER BY +Site, +Auto_Verif, +Manual_Verif, +Outreach_Required; + + + + + + + + + + +WITH base AS ( +SELECT +CASE +WHEN state_d_444699761 = '426360242' THEN 'Method Used' +WHEN state_d_444699761 = '734437214' THEN 'Method Not Used' +WHEN state_d_444699761 IS NULL then 'Auto Verif Missing' +END AS Auto_Verif, + +CASE +WHEN state_d_953614051 = '426360242' THEN 'Method Used' +WHEN state_d_953614051 = '734437214' THEN 'Method Not Used' +WHEN state_d_953614051 IS NULL then 'Manual Verif Missing' +END AS Manual_Verif, + +CASE +WHEN state_d_188797763 = '104430631' THEN 'No' +WHEN state_d_188797763 = '353358909' THEN 'Yes' +WHEN state_d_188797763 IS NULL THEN 'Outreach Required Missing' +END AS Outreach_Required, + +CASE +WHEN d_827220437 = '531629870' THEN 'HealthPartners' +WHEN d_827220437 = '548392715' THEN 'Henry Ford Health System' +WHEN d_827220437 = '125001209' THEN 'Kaiser Permanente Colorado' +WHEN d_827220437 = '327912200' THEN 'Kaiser Permanente Georgia' +WHEN d_827220437 = '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN d_827220437 = '452412599' THEN 'Kaiser Permanente Northwest' +WHEN d_827220437 = '303349821' THEN 'Marshfield Clinic Health System' +WHEN d_827220437 = '657167265' THEN 'Sanford Health' +WHEN d_827220437 = '809703864' THEN 'University of Chicago Medicine' +WHEN d_827220437 = '517700004' THEN 'National Cancer Institute' +WHEN d_827220437 = '472940358' THEN 'Baylor Scott & White Health' +END AS Site + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = '197316935' -- Verification status = verified +AND d_831041022 = '104430631' -- Data destroy = no +-- AND state_d_444699761 IS NOT NULL +-- AND state_d_953614051 IS NOT NULL +-- AND state_d_188797763 IS NOT NULL +), + +counts AS ( +SELECT +Site, +Auto_Verif, +Manual_Verif, +Outreach_Required, +COUNT(*) AS sample_counts +FROM base +GROUP BY Site, Auto_Verif, Manual_Verif, Outreach_Required +), + +sites AS ( +SELECT * FROM UNNEST([ +'HealthPartners', +'Henry Ford Health System', +'Kaiser Permanente Colorado', +'Kaiser Permanente Georgia', +'Kaiser Permanente Hawaii', +'Kaiser Permanente Northwest', +'Marshfield Clinic Health System', +'Sanford Health', +'University of Chicago Medicine', +'National Cancer Institute', +'Baylor Scott & White Health' +]) AS Site +), + +grid AS ( +SELECT +s.Site, +Auto_Verif, +Manual_Verif, +Outreach_Required +FROM sites s +CROSS JOIN UNNEST(['Method Used','Method Not Used', 'Auto Verif Missing']) AS Auto_Verif +CROSS JOIN UNNEST(['Method Used','Method Not Used', 'Manual Verif Missing']) AS Manual_Verif +CROSS JOIN UNNEST(['Yes','No', 'Outreach Required Missing']) AS Outreach_Required +) + +SELECT +g.Site, +g.Auto_Verif, +g.Manual_Verif, +g.Outreach_Required, +COALESCE(c.sample_counts, 0) AS sample_counts +FROM grid g +LEFT JOIN counts c +ON g.Site = c.Site +AND g.Auto_Verif = c.Auto_Verif +AND g.Manual_Verif = c.Manual_Verif +AND g.Outreach_Required = c.Outreach_Required +ORDER BY +Site, + +-- Auto_Verif order +CASE g.Auto_Verif +WHEN 'Method Not Used' THEN 1 +WHEN 'Method Used' THEN 2 +WHEN 'Auto Verif Missing' THEN 3 +END, + +-- Manual_Verif order +CASE g.Manual_Verif +WHEN 'Method Not Used' THEN 1 +WHEN 'Method Used' THEN 2 +WHEN 'Manual Verif Missing' THEN 3 +END, + +-- Outreach_Required order +CASE g.Outreach_Required +WHEN 'No' THEN 1 +WHEN 'Yes' THEN 2 +WHEN 'Outreach Required Missing' THEN 3 +END; diff --git a/queries/yes_no_flag_temp_function_template.sql b/queries/yes_no_flag_temp_function_template.sql new file mode 100644 index 0000000..b34bf7a --- /dev/null +++ b/queries/yes_no_flag_temp_function_template.sql @@ -0,0 +1,28 @@ +-- Source sheet: Yes_No_SQL_function +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CREATE TEMP FUNCTION yes_no_flag(x STRING) +AS ( +CASE +WHEN x = '104430631' THEN 'No' +WHEN x = '353358909' THEN 'Yes' +ELSE NULL +END +); + +SELECT +Connect_ID, + + +yes_no_flag(d_685002411_d_994064239) AS HdRef_Basesrv_v1r0, +yes_no_flag(d_685002411_d_194410742) AS HdRef_Baseblood_v1r0, +yes_no_flag(d_685002411_d_949501163) AS HdRef_Baseurine_v1r0, +yes_no_flag(d_685002411_d_277479354) AS HdRef_Basesaliva_v1r0, +yes_no_flag(d_685002411_d_867203506) AS HdRef_Allsrv_v1r0, +yes_no_flag(d_685002411_d_352996056) AS HdRef_Allsample_v1r0, +yes_no_flag(d_685002411_d_217367618) AS HdRef_BlSpecSrv_v1r0, +yes_no_flag(d_747006172) AS HdWd_WdConsent_v1r0, +yes_no_flag(d_906417725) AS HdWd_Activepart_v1r0, +yes_no_flag(d_773707518) AS HdWd_HIPAArevoked_v1r0, +yes_no_flag(d_831041022) AS HdWd_Destroydata_v1r0, +yes_no_flag(d_987563196) AS HdWd_Deceased_v1r0 diff --git a/scaling_your_r_project.qmd b/scaling_your_r_project.qmd new file mode 100644 index 0000000..600fa83 --- /dev/null +++ b/scaling_your_r_project.qmd @@ -0,0 +1,279 @@ +--- +title: "Scaling Your R Project: From Small Data to Big Data" +author: "Jake Peters" +date: "2024-04-27" +format: + html: + toc: true + toc-depth: 3 + theme: cerulean +--- + +## Introduction + +Transitioning an R project from handling small datasets to Big Data involves navigating a series of evolving challenges related to data size, complexity, processing speed, and infrastructure. This guide outlines the progression of these challenges and provides recommended R packages and solutions to address each phase effectively, focusing exclusively on **data manipulation and handling**. + + +| **Scale of Data** | **Challenges** | +|--------------|---------------------------------------------------------------------------------| +| **Small** | - Data Manipulation & Analysis
    - Reproducibility | +| **Medium** | - Performance Optimization
    - Memory Usage
    - Parallel Processing | +| **Large** | - Out-of-Memory Processing
    - Efficient I/O
    - Scalability | +| **Big** | - Distributed Computing
    - Integration with Big Data Ecosystems
    - Real-Time Processing
    - Scalability & Fault Tolerance | + + + +## 1. Small Data + +### Challenges + +- **Data Manipulation & Analysis:** Efficiently performing standard data operations and analyses. +- **Reproducibility:** Ensuring analyses are reproducible and well-documented. + +### Solutions & R Packages + +#### Data Manipulation + +- [**`dplyr`**](https://CRAN.R-project.org/package=dplyr)**:** Provides a grammar for data manipulation, enabling easy filtering, selecting, mutating, and summarizing. +- [**`tidyr`**](https://CRAN.R-project.org/package=tidyr)**:** Facilitates data tidying, ensuring datasets are in the right format for analysis. + +#### Data Import & Export + +- [**`readr`**](https://CRAN.R-project.org/package=readr)**:** Efficiently reads rectangular data (e.g., CSV, TSV). +- [**`readxl`**](https://CRAN.R-project.org/package=readxl)**:** Reads Excel files. + +#### Reproducibility + +- [**`RMarkdown`**](https://CRAN.R-project.org/package=rmarkdown)**:** Combines code, output, and narrative in a single document. + +------------------------------------------------------------------------ + +## 2. Medium Data + +As datasets grow larger but still generally fit into memory, the focus shifts to improving performance and efficiency. + +### Challenges + +- **Performance Optimization:** Reducing computation time for data manipulation and analysis. +- **Efficient Memory Usage:** Managing memory consumption to handle larger datasets without crashes. +- **Parallel Processing:** Utilizing multiple CPU cores to speed up computations. + +### Solutions & R Packages + +#### Efficient Data Handling + +- [**`data.table`**](https://CRAN.R-project.org/package=data.table)**:** Offers high-performance data manipulation with syntax similar to `dplyr` but optimized for speed and memory efficiency. + +#### Parallel Computing + +- [**`parallel`**](https://stat.ethz.ch/R-manual/R-devel/library/parallel/doc/parallel.pdf)**:** Base R package for parallel execution. +- [**`foreach`**](https://CRAN.R-project.org/package=foreach) and [**`doParallel`**](https://CRAN.R-project.org/package=doParallel)**:** Simplify parallel looping constructs. + +#### Memory Management + +- [**`pryr`**](https://CRAN.R-project.org/package=pryr)**:** Tools for tracking memory usage and optimizing memory consumption. + +#### Enhanced Data Import + +- [**`vroom`**](https://CRAN.R-project.org/package=vroom)**:** Fast reading of rectangular data by leveraging multithreading. + +------------------------------------------------------------------------ + +## 3. Large Data + +When datasets exceed available memory, strategies shift to out-of-memory data management and scalable storage solutions. + +### Challenges + +- **Out-of-Memory Data Processing:** Handling datasets that cannot be loaded entirely into RAM. +- **Efficient I/O Operations:** Minimizing read/write times for large datasets. +- **Scalability:** Ensuring solutions can handle increasing data sizes without significant performance degradation. + +### Solutions & R Packages + +#### Out-of-Memory Data Structures + +- [**`ff`**](https://CRAN.R-project.org/package=ff)**:** Stores data on disk while accessing it as if it were in memory. +- [**`bigmemory`**](https://CRAN.R-project.org/package=bigmemory)**:** Manages massive matrices with shared memory support. +- [**`disk.frame`**](https://CRAN.R-project.org/package=disk.frame)**:** Provides `data.frame`-like objects that are stored on disk, supporting parallel processing. + +#### Database Integration + +- [**`DBI`**](https://CRAN.R-project.org/package=DBI) and [**`dbplyr`**](https://CRAN.R-project.org/package=dbplyr)**:** Interface with various databases (e.g., SQL, PostgreSQL) allowing data manipulation using `dplyr` syntax without loading all data into R. +- [**`duckplyr`**](https://github.com/r-dbi/duckdb)**:** Integrates `dplyr` with **DuckDB**, an in-process SQL OLAP database management system, enabling efficient querying and manipulation of large datasets. +- [**`RSQLite`**](https://CRAN.R-project.org/package=RSQLite)**:** Lightweight, disk-based database. + +#### Efficient Data Import & Export + +- [**`arrow`**](https://CRAN.R-project.org/package=arrow)**:** Facilitates high-performance data interchange using the Apache Arrow format. + +#### Parallel and Distributed Computing + +- [**`future`**](https://CRAN.R-project.org/package=future) and [**`furrr`**](https://CRAN.R-project.org/package=furrr)**:** Simplify asynchronous and parallel processing workflows. + +------------------------------------------------------------------------ + +## 4. Big Data + +At the Big Data scale, data is often distributed across multiple machines or requires integration with distributed computing frameworks. Focusing on **Google BigQuery** as the primary DBMS, the challenges and solutions are tailored accordingly. + +### Challenges + +- **Distributed Computing:** Managing and processing data across multiple nodes or clusters. +- **Integration with Big Data Ecosystems:** Seamlessly working with technologies like Hadoop and Spark. +- **Real-Time Data Processing:** Handling streaming data with low latency. +- **Advanced Scalability and Fault Tolerance:** Ensuring systems can scale dynamically and recover from failures. + +### Solutions & R Packages + +#### Google BigQuery Integration + +- [**`bigrquery`**](https://CRAN.R-project.org/package=bigrquery)**:** Provides an interface to Google's BigQuery, allowing R users to perform SQL queries, manage datasets, and analyze large-scale data without needing to manage the underlying infrastructure. +- [**`dbplyr`**](https://CRAN.R-project.org/package=dbplyr)**:** Works with `bigrquery` to enable `dplyr`-style data manipulation directly on BigQuery tables. +- [**`duckplyr`**](https://github.com/r-dbi/duckdb)**:** While primarily for DuckDB, it can complement BigQuery workflows by handling intermediate large data manipulations efficiently. + +#### Apache Spark Integration + +- [**`sparklyr`**](https://CRAN.R-project.org/package=sparklyr)**:** Connects R to Apache Spark, enabling scalable data processing, machine learning, and integration with Spark’s ecosystem. + +#### Stream Processing + +- [**`sparklyr`**](https://CRAN.R-project.org/package=sparklyr)**:** Also supports structured streaming in Spark for handling real-time data. + +#### Parallel and High-Performance Computing + +- [**`future.batchtools`**](https://CRAN.R-project.org/package=future.batchtools)**:** Integrates the `future` package with batch job schedulers for distributed computing. + +#### Big Data Storage Formats + +- [**`arrow`**](https://CRAN.R-project.org/package=arrow)**:** Facilitates working with columnar storage formats optimized for Big Data. + +#### Cloud Integration + +- [**`bigrquery`**](https://CRAN.R-project.org/package=bigrquery)**:** Specifically integrates with Google BigQuery. +- [**`cloudml`**](https://CRAN.R-project.org/package=cloudml)**:** Interfaces with cloud-based machine learning services. + +------------------------------------------------------------------------ + +## Additional Considerations + +### Workflow Management + +- [**`drake`**](https://CRAN.R-project.org/package=drake) and [**`targets`**](https://CRAN.R-project.org/package=targets)**:** Manage complex workflows, ensuring reproducibility and efficiency as data scales. + +### Machine Learning & Modeling + +- [**`caret`**](https://CRAN.R-project.org/package=caret), [**`mlr3`**](https://CRAN.R-project.org/package=mlr3)**:** Scalable machine learning workflows that can integrate with parallel and distributed systems. +- [**`tidymodels`**](https://CRAN.R-project.org/package=tidymodels)**:** A collection of packages for modeling and machine learning that can scale with appropriate backend support. + +### Performance Monitoring & Optimization + +- [**`profvis`**](https://CRAN.R-project.org/package=profvis)**:** Profiling tool for R code to identify performance bottlenecks. +- [**`bench`**](https://CRAN.R-project.org/package=bench) and [**`microbenchmark`**](https://CRAN.R-project.org/package=microbenchmark)**:** Measure and compare the performance of different approaches. + +------------------------------------------------------------------------ + +## Categorized Package Overview + +For a clearer understanding, here's the table categorized by **Purpose** and **Scale of Data**: + +### Data Manipulation & Tidying + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `dplyr` | Small, Medium | Grammar for data manipulation. Enables filtering, selecting, mutating, etc. | [CRAN](https://CRAN.R-project.org/package=dplyr) | +| `tidyr` | Small | Tools for tidying data, ensuring datasets are in the right format for analysis. | [CRAN](https://CRAN.R-project.org/package=tidyr) | +| `data.table` | Medium | High-performance data manipulation optimized for speed and memory efficiency. | [CRAN](https://CRAN.R-project.org/package=data.table) | +| `disk.frame` | Large | `data.frame`-like objects stored on disk with support for parallel processing. | [CRAN](https://CRAN.R-project.org/package=disk.frame) | +| `duckplyr` | Large, Big | Integrates `dplyr` with DuckDB for efficient large dataset manipulation. | [GitHub](https://github.com/r-dbi/duckdb) | + +### Data Import & Export + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `readr` | Small | Efficiently reads rectangular data (e.g., CSV, TSV). | [CRAN](https://CRAN.R-project.org/package=readr) | +| `readxl` | Small | Reads Excel files. | [CRAN](https://CRAN.R-project.org/package=readxl) | +| `vroom` | Medium | Fast reading of rectangular data using multithreading. | [CRAN](https://CRAN.R-project.org/package=vroom) | +| `arrow` | Large, Big | High-performance data interchange using the Apache Arrow format. | [CRAN](https://CRAN.R-project.org/package=arrow) | + +### Reproducibility + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `RMarkdown` | Small, Medium, Big | Combines code, output, and narrative in a single document for reproducibility. | [CRAN](https://CRAN.R-project.org/package=rmarkdown) | + +### Parallel Computing + +| **Package** | **Scale** | **Description** | **Link** | +|--------------|--------------|-------------------------|--------------------| +| `parallel` | Medium, Large, Big | Base R package for parallel execution. | [Documentation](https://stat.ethz.ch/R-manual/R-devel/library/parallel/doc/parallel.pdf) | +| `foreach` | Medium, Large, Big | Simplifies parallel looping constructs. | [CRAN](https://CRAN.R-project.org/package=foreach) | +| `doParallel` | Medium, Large, Big | Backend for the `foreach` package to enable parallel processing. | [CRAN](https://CRAN.R-project.org/package=doParallel) | +| `future` | Medium, Large, Big | Simplifies asynchronous and parallel processing workflows. | [CRAN](https://CRAN.R-project.org/package=future) | +| `furrr` | Medium, Large, Big | Combines `future` with `purrr` for parallel mapping functions. | [CRAN](https://CRAN.R-project.org/package=furrr) | +| `future.batchtools` | Big | Integrates the `future` package with batch job schedulers for distributed computing. | [CRAN](https://CRAN.R-project.org/package=future.batchtools) | + +### Memory Management + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `pryr` | Medium | Tools for tracking and optimizing memory usage. | [CRAN](https://CRAN.R-project.org/package=pryr) | + +### Out-of-Memory Data Structures + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `ff` | Large | Stores data on disk while accessing it as if it were in memory. | [CRAN](https://CRAN.R-project.org/package=ff) | +| `bigmemory` | Large | Manages massive matrices with shared memory support. | [CRAN](https://CRAN.R-project.org/package=bigmemory) | +| `disk.frame` | Large | `data.frame`-like objects stored on disk with support for parallel processing. | [CRAN](https://CRAN.R-project.org/package=disk.frame) | + +### Database Integration + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `DBI` | Large, Big | Database interface definition for communication between R and DBMS. | [CRAN](https://CRAN.R-project.org/package=DBI) | +| `dbplyr` | Large, Big | `dplyr` backend for databases, allowing SQL-like data manipulation. | [CRAN](https://CRAN.R-project.org/package=dbplyr) | +| `duckplyr` | Large, Big | Integrates `dplyr` with DuckDB for efficient large dataset manipulation. | [GitHub](https://github.com/r-dbi/duckdb) | +| `RSQLite` | Large | Lightweight, disk-based database. | [CRAN](https://CRAN.R-project.org/package=RSQLite) | +| `bigrquery` | Big | Interface to Google's BigQuery for managing and querying large datasets. | [CRAN](https://CRAN.R-project.org/package=bigrquery) | + +### Machine Learning & Modeling + +| **Package** | **Scale** | **Description** | **Link** | +|------------------|------------------|--------------------|-----------------| +| `caret` | Medium to Big | Streamlined model training and tuning for machine learning. | [CRAN](https://CRAN.R-project.org/package=caret) | +| `mlr3` | Medium to Big | Modern, object-oriented machine learning framework. | [CRAN](https://CRAN.R-project.org/package=mlr3) | +| `tidymodels` | Medium to Big | Collection of packages for modeling and machine learning using tidy principles. | [CRAN](https://CRAN.R-project.org/package=tidymodels) | + +### Big Data Integration + +| **Package** | **Purpose** | **Description** | **Link** | +|----------------|--------------------------|----------------|----------------| +| `bigrquery` | Big Data Integration (BigQuery) | Interface to Google's BigQuery for managing and querying large datasets. | [CRAN](https://CRAN.R-project.org/package=bigrquery) | +| `sparklyr` | Big Data Integration (Spark) | Connects R to Apache Spark for scalable data processing and machine learning. | [CRAN](https://CRAN.R-project.org/package=sparklyr) | +| `cloudml` | Cloud Integration (Machine Learning) | Interfaces with cloud-based machine learning services. | [CRAN](https://CRAN.R-project.org/package=cloudml) | + +### Workflow Management + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-------------------|--------------------|-----------------| +| `drake` | Small to Big | Manages complex workflows with a focus on reproducibility and efficiency. | [CRAN](https://CRAN.R-project.org/package=drake) | +| `targets` | Small to Big | Successor to `drake` for pipeline management and workflow automation. | [CRAN](https://CRAN.R-project.org/package=targets) | + +### Performance Monitoring & Optimization + +| **Package** | **Scale** | **Description** | **Link** | +|---------------------|-----------------|------------------|-----------------| +| `profvis` | Small to Big | Profiling tool for R code to identify performance bottlenecks. | [CRAN](https://CRAN.R-project.org/package=profvis) | +| `bench` | Small to Big | Tools for measuring and comparing code performance. | [CRAN](https://CRAN.R-project.org/package=bench) | +| `microbenchmark` | Small to Big | Accurate timing of small code snippets. | [CRAN](https://CRAN.R-project.org/package=microbenchmark) | + +------------------------------------------------------------------------ + +## Final Summary + +Transitioning from small to Big Data in an R project requires a strategic approach to handle increasing data volumes and complexities. By focusing on data manipulation and handling, and leveraging the appropriate R packages categorized by purpose and scale, you can effectively manage and analyze large datasets. The removal of data visualization tools from this overview allows for a more concentrated focus on the core aspects of data management and processing, ensuring that your project remains efficient and scalable as it grows. + +Remember to continually assess your project's needs and adapt your toolset accordingly, ensuring that each phase of data scaling is supported by the best available resources. + +------------------------------------------------------------------------ diff --git a/snippets.qmd b/snippets.qmd deleted file mode 100644 index 1df2b04..0000000 --- a/snippets.qmd +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: "RStudio Snippets" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, you can streamline -your coding process, reduce repetitive tasks, and maintain consistency -across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be quickly -inserted into your scripts or markdown files. They enhance productivity -by minimizing repetitive typing and ensuring consistency across your -coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) - -::: callout-note -Note that all code after the `snippet ` key word must -be indented. -::: - -## More custom snippets to try - -``` r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -``` diff --git a/tutorials/example.R b/tutorials/example.R new file mode 100644 index 0000000..f591875 --- /dev/null +++ b/tutorials/example.R @@ -0,0 +1,46 @@ +# load libraries +library(testthat) +library(usethis) + +# this creates the correct testing file +## replace "example" with your file name +usethis::use_test("example") + +# simple addition function +func1=function(num1, num2, num3){ + x = num1 + num2 + num3 + return(x) +} + +# division function, slightly more complicated +func2 = function(num1, num2){ + x = num1/num2 + return(x) +} + +# text concatenation +func3 = function(str1, str2){ + x = paste(str1, str2, sep = "") # putting no separation, which isn't what I want so that the test will fail + return(x) +} + +# creating a vector +func4 = function(num){ + x = rep(4, num) + return(x) +} + + +# creating a df +func5 = function(num1, num2){ + x = c(num1, num1+1, num1+2) + y = c(num2, num2+1, num2+2) + df = data.frame(x = x, y = y) + return(df) +} + +# Resources +## https://r-pkgs.org/testing-basics.html See Section 13.5.4 for specific expectations you can test +## https://rstudio.github.io/cheatsheets/html/package-development.html +## https://testthat.r-lib.org/reference/index.html +## https://matthieu-bruneaux.gitlab.io/guide-r-rstudio-git-gitlab/060-testing.html, more on expectations \ No newline at end of file diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd new file mode 100644 index 0000000..5e78ac8 --- /dev/null +++ b/tutorials/ggplot_templates.qmd @@ -0,0 +1,232 @@ +--- +title: "ggplot Templates" +author: Leila Orszag +date: 11/01/2024 +keywords: + - productivity + - modularization +format: html +editor: + markdown: + wrap: 72 +--- + +## Purpose + +In this session, we will review how to use ggplot themes and review a +few templates that could be useful for future data visualization. + +## Colors + +We have a set of pre-selected Connect colors, listed below. + +```{r} +color_palette <- list( + blue = c("#2973A5", "#648EB4", "#8BAAC7", "#B1C7D9", "#D8E3EC"), + darkblue = c("#164C71", "#51708A", "#7C94A8", "#A8B7C5", "#D3DBE2"), + yellow = c("#FDBE19", "#F6CC6C", "#F8D991", "#FBE5B5", "#FDF2DA"), + skyblue = c("#309EBD", "#74B0C7", "#97C4D5", "#B9D7E3", "#DDECF1"), + turq = c("#3C989E", "#77ACB0", "#99C0C4", "#BBD5D7", "#DDEAEC"), + grey = c("#565C65", "#797D83", "#9A9DA3", "#BBBEC1", "#DDDEE0"), + brown = c("#CC7D15", "#CD995B", "#DAB384", "#E7CCAD", "#F3E6D6") + ) +``` + +Using Rebecca's code, we can pull the number of distinct colors we need: + +```{r} +select_colors <- function(number) { + # Initialize a vector to store selected colors + selected_colors <- character(number) # Assuming colors are character strings + + # Get the number of color groups and the maximum number of shades + num_groups <- length(color_palette) + max_shades <- max(sapply(color_palette, length)) + + # Loop through each shade level and then each color group to fill the selected_colors + counter <- 1 + for (shade in 1:max_shades) { + for (group in 1:num_groups) { + current_palette <- color_palette[[group]] + if (length(current_palette) >= shade && counter <= number) { + selected_colors[counter] <- current_palette[shade] + counter <- counter + 1 + } + if (counter > number) break # Stop if we've reached the desired number + } + if (counter > number) break + } + + return(selected_colors) +} +``` + +# ggplot + +## Simple ggplot Example + +Pull in relevant library + +```{r} +library(ggplot2) +``` + +Create the data frame + +```{r} +x = c(10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60) +y = c(5, 2, 3, 4, 1, 6, 4, 7, 2, 3, 5, 5) +z = c("girl", "boy", "girl", "boy", "girl", "boy", "girl", "boy", "girl", "boy", "girl", "boy") +data = data.frame(x, y) +``` + +Create our first line graph + +```{r} +ggplot(data, aes(x = x, y = y)) + + geom_point(aes(color = z))+ + geom_line(aes(color = z)) +``` + +This graph is great but doesn't align with our aesthetic. + +```{r} +ggplot(data, aes(x = x, y = y)) + + geom_point(aes(color = z)) + + geom_line(aes(color = z))+ + labs(title = "Title", + x = "x", + y = "y", + legend = "Z") + + scale_colour_manual(values = select_colors(2))+ + theme(plot.title = element_text(hjust = 0.5), + panel.background = element_rect(fill = "white"), + panel.grid.major = element_line(color = "grey"), + panel.grid.minor = element_line(color = "grey")) +``` + +Looks much better, but we want to wrap it into a function. + +## Creating & Using Function + +Below is a function that you can add to a plot after specifying titles & +the number of categories (colors) you need for your graph. + +```{r} +theme_function = function(title, xlab, ylab, legend_lab, n) { + # Add any theme specifications in here + theme <- theme( + plot.title = element_text(hjust = 0.5), + panel.background = element_rect(fill = "white"), + panel.grid.major = element_line(color = "lightgrey"), + panel.grid.minor = element_line(color = "lightgrey") + ) + + # Defining the colors we will return + color_scale <- scale_color_manual(values = select_colors(n)) + color_scale2 <- scale_fill_manual(values = select_colors(n)) + + # Return a list containing the elements we want standardized + list( + # Change labels, or add new ones + labs( + title = title, + x = xlab, + y = ylab, + color = legend_lab), + theme, + color_scale, + color_scale2 + ) +} +``` + +```{r} +ggplot(data, aes(x = x, y = y)) + + geom_point(aes(color = z)) + + geom_line(aes(color = z)) + + theme_function("Title", "X", "Y", "Gender", 2) +``` + +## Histogram + +```{r} +library(palmerpenguins) +colnames(penguins) +``` + +```{r} +ggplot(penguins)+ + geom_histogram(aes(x= bill_length_mm, color = sex, fill = sex), binwidth = 1)+ + theme_function("Title", "X", "Y", "Gender", 3) +``` + +## Bar Graph + +```{r} +ggplot(penguins)+ + geom_bar(aes(x= island, color = sex, fill = sex))+ + theme_function("Title", "X", "Y", "Gender", 3) +``` + +# plotly + +```{r} +library(plotly) +``` + +## Simple plotly Example + +Create our first line graph + +```{r} +plot_ly(data, + x = x, + y = y, + split = ~z, + type = "scatter", + mode = "line & markers") +``` + +```{r} +plot_ly(data, + x = x, + y = y, + color = ~z, + type = "scatter", + mode = "line & markers", + colors = select_colors(2)) %>% + layout( + title = 'Title', + xaxis = list(title = "x"), + yaxis = list(title = "y"), + legend = list(title = list(text = "Gender")) + ) +``` + +## Creating and using function + +```{r} +theme_plotly = function(plot, title, xlab, ylab, legend_lab) { + plot <- plot %>% + layout( + title = title, + xaxis = list(title = xlab), # Use the xlab parameter + yaxis = list(title = ylab), # Use the ylab parameter + legend = list(title = list(text = legend_lab)) # Use the legend_lab parameter + ) + +plot # Return the modified plot +} +``` + +```{r} +plot = plot_ly(data, + x = x, + y = y, + color = ~z, + type = "scatter", + mode = "line & markers", + colors = select_colors(2)) +theme_plotly(plot, "Title", "X", "Y", "Legend") +``` diff --git a/tutorials/info_schema_tutorial.qmd b/tutorials/info_schema_tutorial.qmd new file mode 100644 index 0000000..4b77ac2 --- /dev/null +++ b/tutorials/info_schema_tutorial.qmd @@ -0,0 +1,113 @@ +--- +title: "Tutorial: Querying BigQuery Information Schema" +author: "Jake Peters" +date: 2025-01-22 +--- + +## Introduction + +In this tutorial, we demonstrate how to use a Quarto document to query BigQuery's information schema and identify where specific Concept IDs are located in your database. By integrating R and SQL, you learn how to authenticate with BigQuery, execute custom queries, and dynamically construct search queries based on user-specified Concept IDs. + +## Prerequisites +- Before following this tutorial, ensure you have: R installed on your system. +- The following R packages installed: `bigrquery`, `dplyr`, `DBI`, `dbplyr`, and `glue`. +- Access to a BigQuery project and dataset with appropriate permissions to query the `INFORMATION_SCHEMA`. + +## Table of Contents: +1. **Setup and Authentication:** Load libraries, configure authentication, and establish a connection to BigQuery. +2. **Executing a Basic SQL Query:** Query the BigQuery `INFORMATION_SCHEMA.COLUMNS` to retrieve metadata that matches a specific pattern. +3. **Building a Dynamic Query Function:** Create an R function to dynamically generate SQL queries to search for Concept IDs. +4. **Putting It All Together:** Run the function and review the results. + +## Step 1: Setup and Authentication + +The Quarto document begins with a header that includes essential metadata (title, author, and date). The first code chunk loads the required libraries and prepares the environment. + +Next, the following R code chunk loads the necessary libraries and suppresses the output for a clean setup: + +```{r, warning=FALSE, message=FALSE} +library(bigrquery) +library(dplyr) +library(DBI) +library(dbplyr) +library(glue) + +# Authenticate with BigQuery +bigrquery::bq_auth() +``` + +## Step 2: Establishing the Database Connection + +Here, we specify the dataset and project, authenticate with BigQuery, and open a connection. Finally, we list the tables in the dataset to confirm that the connection is working: + +```{r} +# Specify dataset and project details +dataset <- "FlatConnect" +project <- "nih-nci-dceg-connect-dev" + +# Establish connection to BigQuery +con <- DBI::dbConnect(bigrquery::bigquery(), + project = project, + dataset = dataset, + billing = project) + +# List available tables to verify the connection +DBI::dbListTables(con) +``` + +## Step 3: Querying the Information Schema + +The next code block runs a SQL query on the `INFORMATION_SCHEMA.COLUMNS` to locate columns that contain specific Concept IDs. This query filters columns based on a pattern present in their names: + +```{sql, connection=con} +SELECT + table_catalog, + table_schema, + table_name, + column_name +FROM FlatConnect.INFORMATION_SCHEMA.COLUMNS +WHERE column_name + LIKE '%158409298%261863326%'; +``` + +## Step 4: Building a Dynamic Query Function + +To make the querying process more flexible, we define an R function named `get_schema_info`. This function accepts a vector of Concept IDs and a tier (development, staging, or production), constructs the appropriate SQL query dynamically, and returns the result: + +```{r} +get_schema_info <- function(con, cids, tier, dataset = 'FlatConnect') { + + # Determine the project based on the tier + project <- switch(tier, + dev = "nih-nci-dceg-connect-dev", + stg = "nih-nci-dceg-connect-stg-5519", + prod = "nih-nci-dceg-connect-prod-6d04") + + # Collapse the Concept IDs into a single string with "%" delimiters (e.g., "%cid1%cid2%cid3%") + cid_str <- paste0("%", paste(cids, collapse = "%"), "%") + print(glue("cid_str: {cid_str}\n\n")) + + # Construct the SQL query dynamically + sql <- glue::glue("SELECT table_catalog, table_schema, table_name, column_name + FROM `{project}.{dataset}`.INFORMATION_SCHEMA.COLUMNS + WHERE column_name LIKE '{cid_str}'") + print(glue("SQL Query: \n{sql}\n\n")) + + # Execute the query and store the result + result <- DBI::dbGetQuery(con, sql) + + return(result) +} + +# Execute the function with a vector of Concept IDs and store the output in df +df <- get_schema_info(con, c('158409298', '261863326'), tier='dev') + +# Display the query results +df +``` + +## Conclusion + +This tutorial has shown you how to query BigQuery's `INFORMATION_SCHEMA` to find the locations of specific Concept IDs within your database. By combining R scripting and SQL queries within a Quarto document, you can automate schema exploration and streamline the process of database management and analysis. + +Feel free to modify the query function or extend this approach for different types of metadata searches. Happy querying! diff --git a/tutorials/project_specific_pseudo_ids.md b/tutorials/project_specific_pseudo_ids.md new file mode 100644 index 0000000..8498cf1 --- /dev/null +++ b/tutorials/project_specific_pseudo_ids.md @@ -0,0 +1,106 @@ +# Using Authorized Views for Project-Specific Pseudo IDs in BigQuery + +This tutorial shows you how to maintain a single "master" table with real Connect_ID values while exposing project-specific pseudo_IDs through authorized views. This approach avoids duplicating data across projects and ensures that each project sees only its own pseudo_IDs. The tutorial covers: + +1. Creating the master table. +2. Creating a UDF for project-specific pseudo_ID generation. +3. Creating a project-specific authorized view. +4. Key maintenance and governance points. + +------------------------------------------------------------------------ + +## Step 1: Create the Master Table + +Store your original data with real Connect_ID values in a master table. Limit access to this table so that only trusted service accounts or administrators can query it directly. + +``` sql +CREATE OR REPLACE TABLE your_project.master_dataset.master_table AS +SELECT + Connect_ID, + field_a, + field_b +FROM + source_of_truth; +``` + +*Note:* Be sure to set proper access controls on `master_table` to prevent unauthorized access to real IDs. + +------------------------------------------------------------------------ + +## Step 2: Create a UDF for Project-Specific Hashing + +Create a user-defined function that converts the real Connect_ID into a pseudo_ID. This function incorporates a project-specific salt to generate unique pseudo_IDs per project. + +``` sql +CREATE OR REPLACE FUNCTION your_project.your_dataset.func_project_pseudo_id( + real_id STRING, + project_name STRING +) RETURNS STRING AS ( + SAFE_CONVERT_BYTES_TO_STRING( + SHA256(CONCAT(real_id, '_SECRET_SALT_FOR_', project_name)) + ) +); +``` + +*Key Points:* + +- Adjust the salt (`'_SECRET_SALT_FOR_'`) per project as needed. + +- Ensure that the salt values are stored and managed securely. + +- This function ensures the same real Connect_ID will yield different pseudo_IDs for different projects. + +------------------------------------------------------------------------ + +## Step 3: Create a Project-Specific Authorized View + +Create a view for each project that selects data from the master table and applies the UDF to transform the Connect_ID into a pseudo_ID. Only grant users access to this view—not to the master table. + +For example, for "ProjectA": + +``` sql +CREATE OR REPLACE VIEW your_project.projectA_dataset.view_projectA AS +SELECT + your_project.your_dataset.func_project_pseudo_id(master_table.Connect_ID, 'ProjectA') AS pseudo_ID, + master_table.field_a, + master_table.field_b +FROM + your_project.master_dataset.master_table; +``` + +*Key Considerations:* - Grant the appropriate permissions so that ProjectA users only have access to `view_projectA`. - Each project can have a similar view but with a project-specific salt or project name. + +------------------------------------------------------------------------ + +## Maintenance and Governance + +When implementing this approach, consider the following maintenance and governance aspects: + +- **Single Source of Truth:**\ + Maintain only one master table, minimizing data duplication and the risk of inconsistencies. + +- **Access Control:**\ + Ensure that only authorized users have access to the master table. Each project should only access its authorized view. + +- **Data Updates:**\ + As the master table is updated, the authorized views remain valid and reflect the latest data without additional modifications. + +- **Security Reviews:**\ + Regularly review the security policies surrounding the mapping function and view access. Rotate salts or update security practices as necessary. + +- **Compliance:**\ + Ensure that your setup complies with your organization's data governance policies and any relevant privacy regulations. + +------------------------------------------------------------------------ + +By following these steps, you can keep a secure master dataset while exposing project-specific pseudo_IDs via authorized views. This approach limits the risk of cross-project re-identification and adheres to best practices in data security and governance. + +## Internal Example: + +NCCR - state registry + +- Johanna + +- Use a CBIIT-blessed algorithm + +- diff --git a/tutorials/snippets.qmd b/tutorials/snippets.qmd index 1df2b04..ef887b9 100644 --- a/tutorials/snippets.qmd +++ b/tutorials/snippets.qmd @@ -2,6 +2,8 @@ title: "RStudio Snippets" author: "Jake Peters" date: 10/11/24 +keywords: + - productivity format: html editor: markdown: @@ -105,7 +107,7 @@ snippet dev ``` ![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) +snippet](../media/snippets.gif) ::: callout-note Note that all code after the `snippet ` key word must @@ -141,7 +143,7 @@ snippet query """) # Query the data and store as reference object -data <- dbGetQuery(con, SQL) + data <- dbGetQuery(con, SQL) snippet dev "nih-nci-dceg-connect-dev" diff --git a/tutorials/test-example.R b/tutorials/test-example.R new file mode 100644 index 0000000..c48c89b --- /dev/null +++ b/tutorials/test-example.R @@ -0,0 +1,28 @@ +library(testthat) + +# Source the function file +source("/Users/orszagl2/Documents/AdHoc/unit_testing/example.R") + +# function 1 tests +test_that("Function adds numbers properly", {expect_equal(func1(3, 5, 7), 15)}) +test_that("Function outputs error with non-numerical entry", {expect_error(func1(3, 5, "a"))}) + +# function 2 tests +test_that("Function divides properly", {expect_equal(func2(3, 2), 1.5)}) +test_that("Function outputs error with non-numerical entry", {expect_error(func2(3, "a"))}) +test_that("Function divides properly", {expect_equal(func2(3, 0), Inf)}) +test_that("Class is correct", {expect_type(func2(3, 2), 'double')}) + +# function 3 test, purposefully one that fails! +test_that("Concatenation works", {expect_equal(func3("Hi", "Jake"), "Hi Jake")}) +test_that("Class is correct", {expect_type(func3("Hi", "Jake"), 'character')}) + +# function 4 test +test_that("Length is correct", {expect_length(func4(2), 2)}) +test_that("Length is correct", {expect_true(length(func4(2))==2)}) # above rewritten +test_that("Class is correct", {expect_type(func4(2), 'double')}) + +## s3 class test (function 5) +### common s3 classes include data.frame, lm, glm, ggplot, factor +test_that("Class type works", {expect_s3_class(func5(3, 7), "data.frame")}) +## note exact = false is the default, so it could be a tibble and this test would still pass!