From afbc3e6ef7810dd01df8d4a85704231bd025ee90 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:33:11 -0400 Subject: [PATCH 01/53] update README --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fbae9c8..ebb8d29 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ -# analytics-at-scale-workshop \ No newline at end of file +# analytics-at-scale-workshop + +**live at ** From 9af64e9c34f3d36dd7e4171d3604baf559d89440 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:34:46 -0400 Subject: [PATCH 02/53] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ebb8d29..80ecd7f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ # analytics-at-scale-workshop -**live at ** +live at [https://analyticsphere.github.io/analytics-at-scale-workshop/](https://analyticsphere.github.io/analytics-at-scale-workshop/) From 6e93e8f9635827fd98b48309a48cb29050bf8d3c Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:43:37 -0400 Subject: [PATCH 03/53] add 10/25 meeting notes --- meeting-notes/2024-10-25.qmd | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 meeting-notes/2024-10-25.qmd diff --git a/meeting-notes/2024-10-25.qmd b/meeting-notes/2024-10-25.qmd new file mode 100644 index 0000000..ee3451e --- /dev/null +++ b/meeting-notes/2024-10-25.qmd @@ -0,0 +1,12 @@ +--- +title: Wiki +subtitle: Introduce Analytics at Scale wiki +date: 2024-10-25 +--- + +# Attendees: +JP, .. + +# Agenda + - Introduce Wiki + - Discuss agenda for next meeting \ No newline at end of file From b0c54d5ff40fe5ec00264b21a484c018b3a95aa5 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:46:59 -0400 Subject: [PATCH 04/53] Rename 2024-10-25.qmd to 2024-10-25.md --- meeting-notes/{2024-10-25.qmd => 2024-10-25.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename meeting-notes/{2024-10-25.qmd => 2024-10-25.md} (77%) diff --git a/meeting-notes/2024-10-25.qmd b/meeting-notes/2024-10-25.md similarity index 77% rename from meeting-notes/2024-10-25.qmd rename to meeting-notes/2024-10-25.md index ee3451e..71c9fab 100644 --- a/meeting-notes/2024-10-25.qmd +++ b/meeting-notes/2024-10-25.md @@ -9,4 +9,4 @@ JP, .. # Agenda - Introduce Wiki - - Discuss agenda for next meeting \ No newline at end of file + - Discuss agenda for next meeting From ee720489645920f747eb731c091269396b2e7a3b Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:47:39 -0400 Subject: [PATCH 05/53] Update 2024-10-25.md --- meeting-notes/2024-10-25.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meeting-notes/2024-10-25.md b/meeting-notes/2024-10-25.md index 71c9fab..23c4504 100644 --- a/meeting-notes/2024-10-25.md +++ b/meeting-notes/2024-10-25.md @@ -8,5 +8,5 @@ date: 2024-10-25 JP, .. # Agenda - - Introduce Wiki - - Discuss agenda for next meeting +- Introduce Wiki +- Discuss agenda for next meeting From 805c708f8e9af619476367f517b1f16df230f74f Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:47:54 -0400 Subject: [PATCH 06/53] Rename 2024-10-11.qmd to 2024-10-11.md --- meeting-notes/{2024-10-11.qmd => 2024-10-11.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename meeting-notes/{2024-10-11.qmd => 2024-10-11.md} (100%) diff --git a/meeting-notes/2024-10-11.qmd b/meeting-notes/2024-10-11.md similarity index 100% rename from meeting-notes/2024-10-11.qmd rename to meeting-notes/2024-10-11.md From fdb0a23dd61d8197f8af6c528a513ad8149167a3 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:48:18 -0400 Subject: [PATCH 07/53] Rename 2024-10-04.qmd to 2024-10-04.md --- meeting-notes/{2024-10-04.qmd => 2024-10-04.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename meeting-notes/{2024-10-04.qmd => 2024-10-04.md} (100%) diff --git a/meeting-notes/2024-10-04.qmd b/meeting-notes/2024-10-04.md similarity index 100% rename from meeting-notes/2024-10-04.qmd rename to meeting-notes/2024-10-04.md From c1720e97797deada438258f5416dc7105d941cf7 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:52:41 -0400 Subject: [PATCH 08/53] remove junk files --- .DS_Store | Bin 6148 -> 0 bytes .Rhistory | 1 - snippets.qmd | 162 --------------------------------------------------- 3 files changed, 163 deletions(-) delete mode 100644 .DS_Store delete mode 100644 .Rhistory delete mode 100644 snippets.qmd diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 359dfac9f5cd4b9dd0ad90b0e87006979cec0cce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Z>*N-B5%c6g@6@E!Y-|h?iLF3mDOZN=-~?gK4%jtv!@NuKGg0fsf$L zM-iO;skGLgn=LW}v)}CO%x?Be*v$|^Xk+MC2`Ldm7^uXI1l7RRmB(v}D$?HX7GN@EuXeOJ>uN7E<71_1Z5hG{d4u(m? z?O)2?nGlgTwO#K#3`VWe#<7S8ZWsrBl@R&?gj`*Ou`fmqF^v60>i%B?xJoi9yE^>J~$=o2B+`{#>|GIcGbBR-QIL~ zezYr^P4l_3;hX>J2G2*IBm7??dYzwExOe(-B;q#AI(_1qM?n)AKn9S3 `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) - -::: callout-note -Note that all code after the `snippet ` key word must -be indented. -::: - -## More custom snippets to try - -``` r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -``` From 06d0e86873ab5215851d6fb47ac88dc0493ae204 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:53:51 -0400 Subject: [PATCH 09/53] remove junk folder --- .../0CDE63C2/jobs/50965170-output.json | 98 ----------- .Rproj.user/0CDE63C2/pcs/files-pane.pper | 9 - .Rproj.user/0CDE63C2/pcs/source-pane.pper | 3 - .../0CDE63C2/pcs/windowlayoutstate.pper | 14 -- .Rproj.user/0CDE63C2/pcs/workbench-pane.pper | 5 - .Rproj.user/0CDE63C2/rmd-outputs | 6 - .Rproj.user/0CDE63C2/saved_source_markers | 1 - .Rproj.user/0CDE63C2/sources/prop/19AA840D | 13 -- .Rproj.user/0CDE63C2/sources/prop/503D05BB | 6 - .Rproj.user/0CDE63C2/sources/prop/56B42243 | 13 -- .Rproj.user/0CDE63C2/sources/prop/6BFBD54A | 7 - .Rproj.user/0CDE63C2/sources/prop/7377711F | 11 -- .Rproj.user/0CDE63C2/sources/prop/A0008A94 | 11 -- .Rproj.user/0CDE63C2/sources/prop/B5CF2C5C | 11 -- .Rproj.user/0CDE63C2/sources/prop/BF877C30 | 11 -- .Rproj.user/0CDE63C2/sources/prop/C25DB8A1 | 6 - .Rproj.user/0CDE63C2/sources/prop/C752720B | 9 - .Rproj.user/0CDE63C2/sources/prop/C9422BD5 | 11 -- .Rproj.user/0CDE63C2/sources/prop/DB772C11 | 9 - .Rproj.user/0CDE63C2/sources/prop/INDEX | 12 -- .../sources/session-6a2f53da/0AED968E | 31 ---- .../session-6a2f53da/0AED968E-contents | 62 ------- .../session-6a2f53da/162D8A48-contents | 163 ------------------ .../sources/session-6a2f53da/4F13DDE1 | 26 --- .../session-6a2f53da/4F13DDE1-contents | 38 ---- .../session-6a2f53da/6057111B-contents | 4 - .../sources/session-6a2f53da/6D5FAD6F | 26 --- .../session-6a2f53da/6D5FAD6F-contents | 66 ------- .../session-6a2f53da/76052A04-contents | 1 - .../sources/session-6a2f53da/C45B3481 | 33 ---- .../session-6a2f53da/C45B3481-contents | 9 - .../sources/session-6a2f53da/D4827836 | 29 ---- .../session-6a2f53da/D4827836-contents | 162 ----------------- .../session-6a2f53da/DACF14F8-contents | 26 --- .../sources/session-6a2f53da/F27986C2 | 31 ---- .../session-6a2f53da/F27986C2-contents | 16 -- .../sources/session-6a2f53da/F2F6FBFA | 33 ---- .../session-6a2f53da/F2F6FBFA-contents | 162 ----------------- .../sources/session-6a2f53da/F466712C | 31 ---- .../session-6a2f53da/F466712C-contents | 34 ---- .../sources/session-6a2f53da/lock_file | 0 .../2CBBAA59-24.10.04/1/s/chunks.json | 1 - .../1/0CDE63C219570d0b/chunks.json | 1 - .../1/0CDE63C26a2f53da/chunks.json | 1 - .../1/0CDE63C2cc55e1fa/chunks.json | 1 - .../notebooks/3B243C68-index/1/s/chunks.json | 1 - .../1/0CDE63C26a2f53da/chunks.json | 1 - .../4E1298AF-2024-10-04/1/s/chunks.json | 1 - .../1/0CDE63C26a2f53da/chunks.json | 1 - .../6F16FCC1-2024-10-11/1/s/chunks.json | 1 - .../1/0CDE63C26a2f53da/chunks.json | 1 - .../9DA072F5-snippets/1/s/chunks.json | 1 - .../1/0CDE63C26a2f53da/chunks.json | 1 - .../D470729E-2024.10.11/1/s/chunks.json | 1 - .../1/0CDE63C26a2f53da/chunks.json | 1 - .../F71330AC-meeting-notes/1/s/chunks.json | 1 - .../shared/notebooks/patch-chunk-names | 0 .Rproj.user/shared/notebooks/paths | 8 - 58 files changed, 1272 deletions(-) delete mode 100644 .Rproj.user/0CDE63C2/jobs/50965170-output.json delete mode 100644 .Rproj.user/0CDE63C2/pcs/files-pane.pper delete mode 100644 .Rproj.user/0CDE63C2/pcs/source-pane.pper delete mode 100644 .Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper delete mode 100644 .Rproj.user/0CDE63C2/pcs/workbench-pane.pper delete mode 100644 .Rproj.user/0CDE63C2/rmd-outputs delete mode 100644 .Rproj.user/0CDE63C2/saved_source_markers delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/19AA840D delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/503D05BB delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/56B42243 delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/6BFBD54A delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/7377711F delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/A0008A94 delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/B5CF2C5C delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/BF877C30 delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/C25DB8A1 delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/C752720B delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/C9422BD5 delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/DB772C11 delete mode 100644 .Rproj.user/0CDE63C2/sources/prop/INDEX delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents delete mode 100644 .Rproj.user/0CDE63C2/sources/session-6a2f53da/lock_file delete mode 100644 .Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json delete mode 100644 .Rproj.user/shared/notebooks/patch-chunk-names delete mode 100644 .Rproj.user/shared/notebooks/paths diff --git a/.Rproj.user/0CDE63C2/jobs/50965170-output.json b/.Rproj.user/0CDE63C2/jobs/50965170-output.json deleted file mode 100644 index ba1b4c2..0000000 --- a/.Rproj.user/0CDE63C2/jobs/50965170-output.json +++ /dev/null @@ -1,98 +0,0 @@ -[1,"==> quarto preview index.qmd --to html --no-watch-inputs --no-browse\n\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n"] -[1,"\u001B[1m\u001B[34mTerminating existing preview server....\u001B[39m\u001B[22m\u001B[1m\u001B[34mDONE\n\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34mPreparing to preview\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[1/1] snippets.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"\u001B[32mWatching files for changes\u001B[39m\n\u001B[32mBrowse at \u001B[39m\u001B[4m\u001B[32mhttp://localhost:3122/\u001B[39m\u001B[24m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-11.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-11.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-11.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-11.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-11.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-11.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-04.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-04.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-04.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-04.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[1m\u001B[34m\r[1/2] meeting-notes/2024-10-04.qmd\u001B[39m\u001B[22m\n"] -[1,"\u001B[1m\u001B[34m\r[2/2] meeting-notes.qmd\u001B[39m\u001B[22m\n"] -[1,"\n"] -[1,"Output created: ../_site/meeting-notes/2024-10-04.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-11.html\u001B[39m\n\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes/2024-10-04.html\u001B[39m\n"] -[1,"\u001B[32mGET: /meeting-notes.html\u001B[39m\n"] -[1,"\u001B[32mGET: /tutorials/snippets.html\u001B[39m\n"] -[1,"\u001B[31m /tutorials/media/snippets.gif (404: Not Found)\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] -[1,"\u001B[1mpandoc \u001B[22m\n to: html\n output-file: index.html\n standalone: true\n title-prefix: Analytics at Scale\n section-divs: true\n html-math-method: mathjax\n wrap: none\n default-image-extension: png\n css:\n - styles.css\n toc: true\n \n\u001B[1mmetadata\u001B[22m\n document-css: false\n link-citations: true\n date-format: long\n lang: en\n editor: visual\n theme: cosmo\n title: Analytics at Scale\n subtitle: Workshopping best practices for big data analytics in epidemiology\n \n"] -[1,"Output created: _site/index.html\n\n\u001B[32mWatching files for changes\u001B[39m\n"] -[1,"\u001B[32mGET: /\u001B[39m\n\u001B[32mGET: /\u001B[39m\n"] diff --git a/.Rproj.user/0CDE63C2/pcs/files-pane.pper b/.Rproj.user/0CDE63C2/pcs/files-pane.pper deleted file mode 100644 index d1a259e..0000000 --- a/.Rproj.user/0CDE63C2/pcs/files-pane.pper +++ /dev/null @@ -1,9 +0,0 @@ -{ - "sortOrder": [ - { - "columnIndex": 2, - "ascending": true - } - ], - "path": "~/Documents/analytics_at_scale/.github/workflows" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/pcs/source-pane.pper b/.Rproj.user/0CDE63C2/pcs/source-pane.pper deleted file mode 100644 index b074a4f..0000000 --- a/.Rproj.user/0CDE63C2/pcs/source-pane.pper +++ /dev/null @@ -1,3 +0,0 @@ -{ - "activeTab": 1 -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper b/.Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper deleted file mode 100644 index f5a0adc..0000000 --- a/.Rproj.user/0CDE63C2/pcs/windowlayoutstate.pper +++ /dev/null @@ -1,14 +0,0 @@ -{ - "left": { - "splitterpos": 218, - "topwindowstate": "NORMAL", - "panelheight": 879, - "windowheight": 917 - }, - "right": { - "splitterpos": 237, - "topwindowstate": "NORMAL", - "panelheight": 879, - "windowheight": 917 - } -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/pcs/workbench-pane.pper b/.Rproj.user/0CDE63C2/pcs/workbench-pane.pper deleted file mode 100644 index 5699060..0000000 --- a/.Rproj.user/0CDE63C2/pcs/workbench-pane.pper +++ /dev/null @@ -1,5 +0,0 @@ -{ - "TabSet1": 1, - "TabSet2": 4, - "TabZoom": {} -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/rmd-outputs b/.Rproj.user/0CDE63C2/rmd-outputs deleted file mode 100644 index c4e7f90..0000000 --- a/.Rproj.user/0CDE63C2/rmd-outputs +++ /dev/null @@ -1,6 +0,0 @@ -~/Documents/analytics_at_scale/snippets.html - - - - - diff --git a/.Rproj.user/0CDE63C2/saved_source_markers b/.Rproj.user/0CDE63C2/saved_source_markers deleted file mode 100644 index 2b1bef1..0000000 --- a/.Rproj.user/0CDE63C2/saved_source_markers +++ /dev/null @@ -1 +0,0 @@ -{"active_set":"","sets":[]} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/19AA840D b/.Rproj.user/0CDE63C2/sources/prop/19AA840D deleted file mode 100644 index 1737d9e..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/19AA840D +++ /dev/null @@ -1,13 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "9,0", - "scrollLine": "0", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "295:0", - "docOutlineSize": "141" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/503D05BB b/.Rproj.user/0CDE63C2/sources/prop/503D05BB deleted file mode 100644 index eb1ff25..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/503D05BB +++ /dev/null @@ -1,6 +0,0 @@ -{ - "source_window_id": "", - "Source": "Source", - "cursorPosition": "42,45", - "scrollLine": "0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/56B42243 b/.Rproj.user/0CDE63C2/sources/prop/56B42243 deleted file mode 100644 index 72cc636..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/56B42243 +++ /dev/null @@ -1,13 +0,0 @@ -{ - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualMode": "true", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "30:0", - "rmdVisualWrapConfigured": "true", - "docOutlineVisible": "1", - "docOutlineSize": "259" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/6BFBD54A b/.Rproj.user/0CDE63C2/sources/prop/6BFBD54A deleted file mode 100644 index e540033..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/6BFBD54A +++ /dev/null @@ -1,7 +0,0 @@ -{ - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "33,3", - "scrollLine": "50" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/7377711F b/.Rproj.user/0CDE63C2/sources/prop/7377711F deleted file mode 100644 index bdf4b18..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/7377711F +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "13,69", - "scrollLine": "0", - "rmdVisualModeLocation": "202:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/A0008A94 b/.Rproj.user/0CDE63C2/sources/prop/A0008A94 deleted file mode 100644 index 40ec751..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/A0008A94 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualModeLocation": "2:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/B5CF2C5C b/.Rproj.user/0CDE63C2/sources/prop/B5CF2C5C deleted file mode 100644 index 9215d1f..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/B5CF2C5C +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "2,47", - "scrollLine": "0", - "rmdVisualModeLocation": "121:6" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/BF877C30 b/.Rproj.user/0CDE63C2/sources/prop/BF877C30 deleted file mode 100644 index 40ec751..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/BF877C30 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualModeLocation": "2:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/C25DB8A1 b/.Rproj.user/0CDE63C2/sources/prop/C25DB8A1 deleted file mode 100644 index d8354c2..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/C25DB8A1 +++ /dev/null @@ -1,6 +0,0 @@ -{ - "source_window_id": "", - "Source": "Source", - "cursorPosition": "25,30", - "scrollLine": "2" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/C752720B b/.Rproj.user/0CDE63C2/sources/prop/C752720B deleted file mode 100644 index cc3d37d..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/C752720B +++ /dev/null @@ -1,9 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "2:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/C9422BD5 b/.Rproj.user/0CDE63C2/sources/prop/C9422BD5 deleted file mode 100644 index 6e20cec..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/C9422BD5 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "158:0", - "cursorPosition": "25,78", - "scrollLine": "0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/DB772C11 b/.Rproj.user/0CDE63C2/sources/prop/DB772C11 deleted file mode 100644 index cac365c..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/DB772C11 +++ /dev/null @@ -1,9 +0,0 @@ -{ - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "120:0" -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/prop/INDEX b/.Rproj.user/0CDE63C2/sources/prop/INDEX deleted file mode 100644 index b7b0f5d..0000000 --- a/.Rproj.user/0CDE63C2/sources/prop/INDEX +++ /dev/null @@ -1,12 +0,0 @@ -~%2FDocuments%2Fanalytics_at_scale%2F.github%2Fworkflows%2Fquarto-publish.yml="503D05BB" -~%2FDocuments%2Fanalytics_at_scale%2F_quarto.yml="C25DB8A1" -~%2FDocuments%2Fanalytics_at_scale%2Fabout.qmd="C752720B" -~%2FDocuments%2Fanalytics_at_scale%2Findex.qmd="C9422BD5" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F2024-10-04.qmd="B5CF2C5C" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F2024-10-11.qmd="7377711F" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F2024.10.11.qmd="A0008A94" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes%2F24.10.04.qmd="BF877C30" -~%2FDocuments%2Fanalytics_at_scale%2Fmeeting-notes.qmd="19AA840D" -~%2FDocuments%2Fanalytics_at_scale%2Fsnippets.qmd="56B42243" -~%2FDocuments%2Fanalytics_at_scale%2Fsnippets.qmd.R="6BFBD54A" -~%2FDocuments%2Fanalytics_at_scale%2Ftutorials%2Fsnippets.qmd="DB772C11" diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E deleted file mode 100644 index 9f5ffc4..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E +++ /dev/null @@ -1,31 +0,0 @@ -{ - "id": "0AED968E", - "path": "~/Documents/analytics_at_scale/meeting-notes/2024-10-04.qmd", - "project_path": "meeting-notes/2024-10-04.qmd", - "type": "quarto_markdown", - "hash": "2331898432", - "contents": "", - "dirty": false, - "created": 1729796157579.0, - "source_on_save": true, - "relative_order": 4, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "2,47", - "scrollLine": "0", - "rmdVisualModeLocation": "121:6" - }, - "folds": "", - "lastKnownWriteTime": 1729801518, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729801518515, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E-contents deleted file mode 100644 index 58168ec..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/0AED968E-contents +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Counts by Period -subtitle: Optimizing counts by period using DBI -date: 2024-10-04 -author: Jake Peters ---- - -### Attendees: -JP, KD, BC, LO - -### Tutorial: Count by Period w/ DBI -JP walked through the [Count by Period w/ DBI](https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html) tutorial to demonstrate some code he wrote to create counts by day, week, month, quarter or year using a DBI connection to avoid downloading the data until after aggreagion. This was intended to address some issues that Kelsey was having with the Biospecimen Report which has many tables that are aggregated by date, precluding her from using DBI easily. - -### What do we want to use this workshop for? - -- Leila: - - - Use this to refactor code that is written, but need guidance to improve or use better practices - - - Debug problem code together - -- Jake - - - Tackle big, recurring problems together - - - Write functions that can be reused - -- Kelsey: - - - Refactoring code that has been re-used or passed repeatedly - - - Even working with ChatGPT - - - Using SQL to do transformations THEN pulling the data down - -- Brittany: - - - SQL specific discussion. - - - We are all a little weaker in SQL. - -- SQL: - - - Joins - - - Unions - - - Transformations - - - Group By (with counts) - - - Ungroup after counts (when is this necessary and why) - - - SQL Course on Coursera - -### Second Workshop: - -- Code Snippets - -### Future Worshops: - -- Joining Module 1 v1 and v2 Joining Module 2 v1 and v2 diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents deleted file mode 100644 index e125d31..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/162D8A48-contents +++ /dev/null @@ -1,163 +0,0 @@ ---- -title: "RStudio Snippets Tutorial" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, -you can streamline your coding process, reduce repetitive tasks, and -maintain consistency across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be -quickly inserted into your scripts or markdown files. They enhance -productivity by minimizing repetitive typing and ensuring consistency -across your coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own snippet](media/snippets.gif) - -::: {.callout-note} -Note that all code after the `snippet ` key word must be indented. -::: - -## More custom snippets to try - -```r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -```` - - diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 deleted file mode 100644 index e12b200..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1 +++ /dev/null @@ -1,26 +0,0 @@ -{ - "id": "4F13DDE1", - "path": "~/Documents/analytics_at_scale/_quarto.yml", - "project_path": "_quarto.yml", - "type": "yaml", - "hash": "2293911025", - "contents": "", - "dirty": false, - "created": 1728654014367.0, - "source_on_save": false, - "relative_order": 2, - "properties": { - "source_window_id": "", - "Source": "Source", - "cursorPosition": "25,30", - "scrollLine": "2" - }, - "folds": "", - "lastKnownWriteTime": 1729800548, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729800548269, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents deleted file mode 100644 index f7b1925..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/4F13DDE1-contents +++ /dev/null @@ -1,38 +0,0 @@ -project: - type: website - -website: - title: "Analytics at Scale" - - navbar: - search: true - tools: - - icon: github - menu: - - text: Source Code - href: https://github.com/analyticsphere/analytics-at-scale - - text: Analyticshpere - href: https://github.com/analyticsphere - - sidebar: - style: "floating" - search: true - contents: - - href: index.qmd - text: Home - - meeting-notes.qmd - - section: "Tutorials" - contents: - - tutorials/snippets.qmd - - text: "Count by Period w/ DBI " - url: https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html - - -format: - html: - theme: cosmo - css: styles.css - toc: true - -editor: visual - diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents deleted file mode 100644 index 27546a0..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6057111B-contents +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Kickoff -date: "2024-10-24" ---- diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F deleted file mode 100644 index 4bce691..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F +++ /dev/null @@ -1,26 +0,0 @@ -{ - "id": "6D5FAD6F", - "path": "~/Documents/analytics_at_scale/.github/workflows/quarto-publish.yml", - "project_path": ".github/workflows/quarto-publish.yml", - "type": "yaml", - "hash": "0", - "contents": "", - "dirty": false, - "created": 1729862007230.0, - "source_on_save": false, - "relative_order": 8, - "properties": { - "source_window_id": "", - "Source": "Source", - "cursorPosition": "42,45", - "scrollLine": "0" - }, - "folds": "", - "lastKnownWriteTime": 1729862022, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729862022079, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents deleted file mode 100644 index 338ca78..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/6D5FAD6F-contents +++ /dev/null @@ -1,66 +0,0 @@ -on: - push: - branches: main - -name: Render and Publish - -# you need these permissions to publish to GitHub pages -permissions: - contents: write - pages: write - -jobs: - build-deploy: - runs-on: ubuntu-latest - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Set up Quarto - uses: quarto-dev/quarto-actions/setup@v2 - with: - # To install LaTeX to build PDF book - tinytex: true - # uncomment below and fill to pin a version - # version: SPECIFIC-QUARTO-VERSION-HERE - - # add software dependencies here and any libraries - - # From https://github.com/actions/setup-python - # - name: Setup Python - # uses: actions/setup-python@v3 - - # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - # - name: Setup R - # uses: r-lib/actions/setup-r@v2 - - # From https://github.com/julia-actions/setup-julia - # - name: Setup Julia - # uses: julia-actions/setup-julia@v1 - - # See more at https://github.com/quarto-dev/quarto-actions/blob/main/examples/example-03-dependencies.md - - # To publish to Netlify, RStudio Connect, or GitHub Pages, uncomment - # the appropriate block below - - # - name: Publish to Netlify (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: netlify - # NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - - # - name: Publish to RStudio Connect (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: connect - # CONNECT_SERVER: enter-the-server-url-here - # CONNECT_API_KEY: ${{ secrets.CONNECT_API_KEY }} - - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - - name: Publish to GitHub Pages (and render) - uses: quarto-dev/quarto-actions/publish@v2 - with: - target: gh-pages - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # this secret is always available for github actions - \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents deleted file mode 100644 index 3d95ecc..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/76052A04-contents +++ /dev/null @@ -1 +0,0 @@ -"nih-nci-dceg-connect-dev" \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 deleted file mode 100644 index fdabc52..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481 +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "C45B3481", - "path": "~/Documents/analytics_at_scale/meeting-notes.qmd", - "project_path": "meeting-notes.qmd", - "type": "quarto_markdown", - "hash": "4104937591", - "contents": "", - "dirty": false, - "created": 1729796064645.0, - "source_on_save": true, - "relative_order": 3, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "9,0", - "scrollLine": "0", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "295:0", - "docOutlineSize": "141" - }, - "folds": "", - "lastKnownWriteTime": 1729799650, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729799650238, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents deleted file mode 100644 index ab4a89b..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/C45B3481-contents +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: "Meeting Notes" -listing: - type: table - fields: [date, title, subtitle] - sort: "date desc" - categories: false - contents: "meeting-notes/*.qmd" ---- diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 deleted file mode 100644 index 8f1009b..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836 +++ /dev/null @@ -1,29 +0,0 @@ -{ - "id": "D4827836", - "path": "~/Documents/analytics_at_scale/tutorials/snippets.qmd", - "project_path": "tutorials/snippets.qmd", - "type": "quarto_markdown", - "hash": "0", - "contents": "", - "dirty": false, - "created": 1729799654509.0, - "source_on_save": false, - "relative_order": 7, - "properties": { - "rmdVisualMode": "true", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "120:0" - }, - "folds": "", - "lastKnownWriteTime": 1729798425, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729798425, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents deleted file mode 100644 index 1df2b04..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/D4827836-contents +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: "RStudio Snippets" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, you can streamline -your coding process, reduce repetitive tasks, and maintain consistency -across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be quickly -inserted into your scripts or markdown files. They enhance productivity -by minimizing repetitive typing and ensuring consistency across your -coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) - -::: callout-note -Note that all code after the `snippet ` key word must -be indented. -::: - -## More custom snippets to try - -``` r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -``` diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents deleted file mode 100644 index 6475a6f..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/DACF14F8-contents +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Meeting Notes" ---- - -::: {.callout-note icon=false collapse=true} - -## 24.10.04 - Kick-off - -Using callouts is an effective way to highlight content that your reader give special consideration or attention. - -::: - - -::: {.callout-note icon=false collapse=true} - -## 24.10.04 - Kick-off - -**Attendees:** JP, KD, BC, LO - -**Topic:** What do we want from this workshop? - -**Notes:** - - item - - item - - item -::: \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 deleted file mode 100644 index 856927f..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2 +++ /dev/null @@ -1,31 +0,0 @@ -{ - "id": "F27986C2", - "path": "~/Documents/analytics_at_scale/meeting-notes/2024-10-11.qmd", - "project_path": "meeting-notes/2024-10-11.qmd", - "type": "quarto_markdown", - "hash": "3039937237", - "contents": "", - "dirty": false, - "created": 1729797996405.0, - "source_on_save": true, - "relative_order": 5, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "1", - "rmdVisualCollapsedChunks": "", - "cursorPosition": "13,69", - "scrollLine": "0", - "rmdVisualModeLocation": "202:0" - }, - "folds": "", - "lastKnownWriteTime": 1729801134, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729801134707, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2-contents deleted file mode 100644 index f5058c3..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F27986C2-contents +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Snippets -subtitle: Custom code snippets in RStudio -date: 2024-10-11 -author: Jake Peters ---- - -### Attendees - -JP, LO, BC - -### Notes - -- JP walked through a [RStudio Snippets](../tutorials/snippets.qmd) tutorial. - -- LO and BC set up custom RStudio Snippets on their local machines. diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA deleted file mode 100644 index f0dfb94..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "F2F6FBFA", - "path": "~/Documents/analytics_at_scale/snippets.qmd", - "project_path": "snippets.qmd", - "type": "quarto_markdown", - "hash": "0", - "contents": "", - "dirty": false, - "created": 1729798351799.0, - "source_on_save": false, - "relative_order": 6, - "properties": { - "tempName": "Untitled1", - "source_window_id": "", - "Source": "Source", - "cursorPosition": "0,0", - "scrollLine": "0", - "rmdVisualMode": "true", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "30:0", - "rmdVisualWrapConfigured": "true", - "docOutlineVisible": "1", - "docOutlineSize": "259" - }, - "folds": "", - "lastKnownWriteTime": 1729800341, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729800341542, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents deleted file mode 100644 index 1df2b04..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F2F6FBFA-contents +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: "RStudio Snippets" -author: "Jake Peters" -date: 10/11/24 -format: html -editor: - markdown: - wrap: 72 ---- - -# Introduction - -This tutorial demonstrates how to use both built-in and custom RStudio -snippets within an R script. By leveraging snippets, you can streamline -your coding process, reduce repetitive tasks, and maintain consistency -across your projects. - -## Table of Contents - -1. Introduction to RStudio Snippets -2. Using Built-in Snippets -3. Creating and Using Custom Snippets - -## Introduction to RStudio Snippets - -*Snippets* in RStudio are predefined blocks of code that can be quickly -inserted into your scripts or markdown files. They enhance productivity -by minimizing repetitive typing and ensuring consistency across your -coding projects. - -In this tutorial, we'll explore how to utilize RStudio's built-in -snippets and create custom snippets tailored to your specific workflow. - ------------------------------------------------------------------------- - -## Using Built-in Snippets - -RStudio comes equipped with several built-in snippets that can be -effortlessly used in `.qmd` files. Below are some commonly used built-in -snippets along with examples of their usage. - -#### Fun - -Type `fun` and press `Tab`. - -``` r -fun my_function <- function(arg1, arg2) { -# Function body -} -``` - -#### For - -Type `for` and press `Tab`. - -``` r -for (variable in vector) { - -} -``` - -#### While - -Type `while` and press `Tab`. - -``` r -while (condition) { - -} -``` - -#### Apply - -Type `lapply` and press `Tab`. - -``` r -apply(array, margin, ...) -``` - -#### Lapply - -Type `lapply` and press `Tab`. - -``` r -lapply(list, function) -``` - -#### ts - -Type `ts` and press `Tab`. - -``` r -# Fri Oct 11 09:53:23 2024 ------------------------------ -``` - -## Using Custom Snippets - -Go to `Tools` \> `Global Options` \> `Snippets` \> `Edit`. - -Then paste the following at the bottom of the page and hit `Apply` - -``` r -snippet dev - "nih-nci-dceg-connect-dev" -``` - -![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) - -::: callout-note -Note that all code after the `snippet ` key word must -be indented. -::: - -## More custom snippets to try - -``` r -snippet header - ## Description ================================================================= - # Title: ${1:Script Title} - # Author: ${2:Your Name} - # Date: ${3:`r format(Sys.Date(), "%Y-%m-%d")`} - # Objective: ${4:Script Purpose} - # GH-Issue: ${5:Github Issue} - -snippet query - bq_auth() # Authenticate with BigQuery - con <- DBI::dbConnect(bigrquery::bigquery(), - project=project, dataset=dataset, billing=billing)) - - # Specify just the data we need with a query - dataset <- "FlatConnect" - table <- "participants_JP" - tier <- "nih-nci-dceg-connect-prod-6d04" - sql <- glue::glue( - """ - SELECT - Connect_ID, - token - FROM `{project}.{dataset}.{table}` - """) - - # Query the data and store as reference object -data <- dbGetQuery(con, SQL) - -snippet dev - "nih-nci-dceg-connect-dev" - -snippet stg - "nih-nci-dceg-connect-stg-5519" - -snippet prod - "nih-nci-dceg-connect-prod-6d04" - -snippet exclusions - WHERE - p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified' - AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT "Yes" - AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No' - AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT "Yes" - AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes' -``` diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C deleted file mode 100644 index 32d8aa9..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C +++ /dev/null @@ -1,31 +0,0 @@ -{ - "id": "F466712C", - "path": "~/Documents/analytics_at_scale/index.qmd", - "project_path": "index.qmd", - "type": "quarto_markdown", - "hash": "1743756931", - "contents": "", - "dirty": false, - "created": 1728654014033.0, - "source_on_save": true, - "relative_order": 1, - "properties": { - "rmdVisualMode": "false", - "rmdVisualWrapConfigured": "true", - "source_window_id": "", - "Source": "Source", - "docOutlineVisible": "0", - "rmdVisualCollapsedChunks": "", - "rmdVisualModeLocation": "158:0", - "cursorPosition": "25,78", - "scrollLine": "0" - }, - "folds": "", - "lastKnownWriteTime": 1729861922, - "encoding": "UTF-8", - "collab_server": "", - "source_window": "", - "last_content_update": 1729861922941, - "read_only": false, - "read_only_alternatives": [] -} \ No newline at end of file diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents deleted file mode 100644 index 7bdce6e..0000000 --- a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/F466712C-contents +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Analytics at Scale" -subtitle: Workshopping best practices for big data analytics in epidemiology ---- - -## Objective - - Hold weekly workshops **lead by team members** or invitees. - - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page - - **Standardize** our analytic workflows. - -## Focus - -- Best practices for writing **robust**, **reproducible**, and **readable** R code -- Optimization for big data as our data set grows -- Tips and tricks for productivity -- Integration with cloud computing resources and data sources -- Integrating R and SQL - -## Past topics -DBI and dbplyr, code snippets - -## Future workshop topics: -functional programming, `renv`, GitHub, Docker, pipelines, Style Guides, -developing R packages, publishing data products, RStudio Snippets, `linter`, -`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, -`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB, -... - -## References -- [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham -- [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez -- [R Packages](https://r-pkgs.org/) by Hadley Wickham -- [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff - diff --git a/.Rproj.user/0CDE63C2/sources/session-6a2f53da/lock_file b/.Rproj.user/0CDE63C2/sources/session-6a2f53da/lock_file deleted file mode 100644 index e69de29..0000000 diff --git a/.Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json b/.Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json deleted file mode 100644 index a894885..0000000 --- a/.Rproj.user/shared/notebooks/2CBBAA59-24.10.04/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796169} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json deleted file mode 100644 index 2cba458..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C219570d0b/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729779284} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 85d8f98..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729795766} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json deleted file mode 100644 index 40235a6..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/0CDE63C2cc55e1fa/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1728655103} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json b/.Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json deleted file mode 100644 index 85d8f98..0000000 --- a/.Rproj.user/shared/notebooks/3B243C68-index/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729795766} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 03af110..0000000 --- a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796338} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json b/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json deleted file mode 100644 index 03af110..0000000 --- a/.Rproj.user/shared/notebooks/4E1298AF-2024-10-04/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796338} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 7e93b17..0000000 --- a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798164} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json b/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json deleted file mode 100644 index 7e93b17..0000000 --- a/.Rproj.user/shared/notebooks/6F16FCC1-2024-10-11/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798164} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 5802856..0000000 --- a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798354} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json b/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json deleted file mode 100644 index 5802856..0000000 --- a/.Rproj.user/shared/notebooks/9DA072F5-snippets/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729798354} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index b69a406..0000000 --- a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729797996} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json b/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json deleted file mode 100644 index b69a406..0000000 --- a/.Rproj.user/shared/notebooks/D470729E-2024.10.11/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729797996} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json b/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json deleted file mode 100644 index 3bf8b5b..0000000 --- a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/0CDE63C26a2f53da/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796066} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json b/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json deleted file mode 100644 index 3bf8b5b..0000000 --- a/.Rproj.user/shared/notebooks/F71330AC-meeting-notes/1/s/chunks.json +++ /dev/null @@ -1 +0,0 @@ -{"chunk_definitions":[],"doc_write_time":1729796066} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/patch-chunk-names b/.Rproj.user/shared/notebooks/patch-chunk-names deleted file mode 100644 index e69de29..0000000 diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths deleted file mode 100644 index 1432c4a..0000000 --- a/.Rproj.user/shared/notebooks/paths +++ /dev/null @@ -1,8 +0,0 @@ -/Users/petersjm/Documents/analytics_at_scale/index.qmd="3B243C68" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes.qmd="F71330AC" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/2024-10-04.qmd="4E1298AF" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/2024-10-11.qmd="6F16FCC1" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/2024.10.11.qmd="D470729E" -/Users/petersjm/Documents/analytics_at_scale/meeting-notes/24.10.04.qmd="2CBBAA59" -/Users/petersjm/Documents/analytics_at_scale/snippets.qmd="9DA072F5" -/Users/petersjm/Documents/analytics_at_scale/tutorials/snippets.qmd="9348CF30" From b2fbfbff74d7b89510531de8d1fb88d956acf419 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:54:38 -0400 Subject: [PATCH 10/53] add .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68223f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata +.Rproj.user/* From 16a5fce7c8d23e8a83e4f509a15406016f43aeb5 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:59:10 -0400 Subject: [PATCH 11/53] Rename 2024-10-04.md to 2024-10-04.qmd --- meeting-notes/{2024-10-04.md => 2024-10-04.qmd} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename meeting-notes/{2024-10-04.md => 2024-10-04.qmd} (100%) diff --git a/meeting-notes/2024-10-04.md b/meeting-notes/2024-10-04.qmd similarity index 100% rename from meeting-notes/2024-10-04.md rename to meeting-notes/2024-10-04.qmd From a62e3a28e76002e0f2de102909fe42fa3700f8db Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 09:59:37 -0400 Subject: [PATCH 12/53] Rename 2024-10-04.qmd to 2024-10-04.md --- meeting-notes/{2024-10-04.qmd => 2024-10-04.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename meeting-notes/{2024-10-04.qmd => 2024-10-04.md} (100%) diff --git a/meeting-notes/2024-10-04.qmd b/meeting-notes/2024-10-04.md similarity index 100% rename from meeting-notes/2024-10-04.qmd rename to meeting-notes/2024-10-04.md From d470429d5a241797357f7c299a690f575e369d06 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 10:00:03 -0400 Subject: [PATCH 13/53] Update meeting-notes.qmd --- meeting-notes.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meeting-notes.qmd b/meeting-notes.qmd index ab4a89b..55e1139 100644 --- a/meeting-notes.qmd +++ b/meeting-notes.qmd @@ -5,5 +5,5 @@ listing: fields: [date, title, subtitle] sort: "date desc" categories: false - contents: "meeting-notes/*.qmd" + contents: "meeting-notes/*.md" --- From 6f36d458c462eac8df3c02bf7d66680f337857b4 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 10:00:50 -0400 Subject: [PATCH 14/53] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 80ecd7f..84cb016 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ # analytics-at-scale-workshop live at [https://analyticsphere.github.io/analytics-at-scale-workshop/](https://analyticsphere.github.io/analytics-at-scale-workshop/) + From b244f49d73ebdbf06552f35083734f71a372ff2a Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 10:09:46 -0400 Subject: [PATCH 15/53] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 84cb016..6b56cd9 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,4 @@ live at [https://analyticsphere.github.io/analytics-at-scale-workshop/](https://analyticsphere.github.io/analytics-at-scale-workshop/) + # hey From 922b638868e6a77731211ff0c7a649c0ab18d675 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 10:40:50 -0400 Subject: [PATCH 16/53] Update _quarto.yml --- _quarto.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index f7b1925..7ab3485 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -9,8 +9,8 @@ website: tools: - icon: github menu: - - text: Source Code - href: https://github.com/analyticsphere/analytics-at-scale + - text: Website Source Code + href: https://github.com/Analyticsphere/analytics-at-scale-workshop - text: Analyticshpere href: https://github.com/analyticsphere From 6140c20d3dd5078044821ee82f8abece29702bff Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 12:18:38 -0400 Subject: [PATCH 17/53] upated yaml --- README.md | 2 +- _quarto.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 80ecd7f..7453431 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ # analytics-at-scale-workshop -live at [https://analyticsphere.github.io/analytics-at-scale-workshop/](https://analyticsphere.github.io/analytics-at-scale-workshop/) +live at diff --git a/_quarto.yml b/_quarto.yml index f7b1925..af5c63b 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -21,13 +21,13 @@ website: - href: index.qmd text: Home - meeting-notes.qmd - - section: "Tutorials" + - section: Tutorials + contents: tutorials/*.*md + - section: External Tutorials contents: - - tutorials/snippets.qmd - - text: "Count by Period w/ DBI " + - text: "Count by Period w/ DBI" url: https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html - format: html: theme: cosmo From c4a2abf41a51d383fd7a851b325828a147f35c5e Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 13:24:16 -0400 Subject: [PATCH 18/53] updated meeting notes --- meeting-notes.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meeting-notes.qmd b/meeting-notes.qmd index ab4a89b..5e43a0f 100644 --- a/meeting-notes.qmd +++ b/meeting-notes.qmd @@ -5,5 +5,5 @@ listing: fields: [date, title, subtitle] sort: "date desc" categories: false - contents: "meeting-notes/*.qmd" + contents: "meeting-notes/*.*md" --- From b4f078c425331929376369c835c4bfc62ea33160 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 13:25:06 -0400 Subject: [PATCH 19/53] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 68223f4..a404d26 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .RData .Ruserdata .Rproj.user/* +.Rproj* From e1f9528587bb309edbd2b5c45edc808a8e62b77e Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 13:29:09 -0400 Subject: [PATCH 20/53] minor update --- .gitignore | 6 + .../_freeze/about/execute-results/html.json | 15 -- .../_freeze/index/execute-results/html.json | 15 -- .../site_libs/clipboard/clipboard.min.js | 7 - .../site_libs/quarto-listing/list.min.js | 2 - .../quarto-listing/quarto-listing.js | 243 ------------------ .quarto/idx/index.qmd.json | 1 - .quarto/idx/meeting-notes.qmd.json | 1 - .quarto/idx/meeting-notes/2024-10-04.qmd.json | 1 - .quarto/idx/meeting-notes/2024-10-11.qmd.json | 1 - .quarto/idx/snippets.qmd.json | 1 - .quarto/idx/tutorials/snippets.qmd.json | 1 - .quarto/listing/listing-cache.json | 7 - .quarto/preview/lock | 1 - .quarto/xref/598b2e90 | 1 - .quarto/xref/6afc8b7a | 1 - .quarto/xref/755cd591 | 1 - .quarto/xref/862fc1db | 1 - .quarto/xref/INDEX | 26 -- .quarto/xref/d0caaf14 | 1 - .quarto/xref/d7a01680 | 1 - .quarto/xref/e199f2dd | 1 - .quarto/xref/fa6fa8d5 | 1 - _quarto.yml | 4 +- index.qmd | 29 +-- 25 files changed, 22 insertions(+), 347 deletions(-) delete mode 100644 .quarto/_freeze/about/execute-results/html.json delete mode 100644 .quarto/_freeze/index/execute-results/html.json delete mode 100644 .quarto/_freeze/site_libs/clipboard/clipboard.min.js delete mode 100644 .quarto/_freeze/site_libs/quarto-listing/list.min.js delete mode 100644 .quarto/_freeze/site_libs/quarto-listing/quarto-listing.js delete mode 100644 .quarto/idx/index.qmd.json delete mode 100644 .quarto/idx/meeting-notes.qmd.json delete mode 100644 .quarto/idx/meeting-notes/2024-10-04.qmd.json delete mode 100644 .quarto/idx/meeting-notes/2024-10-11.qmd.json delete mode 100644 .quarto/idx/snippets.qmd.json delete mode 100644 .quarto/idx/tutorials/snippets.qmd.json delete mode 100644 .quarto/listing/listing-cache.json delete mode 100644 .quarto/preview/lock delete mode 100644 .quarto/xref/598b2e90 delete mode 100644 .quarto/xref/6afc8b7a delete mode 100644 .quarto/xref/755cd591 delete mode 100644 .quarto/xref/862fc1db delete mode 100644 .quarto/xref/INDEX delete mode 100644 .quarto/xref/d0caaf14 delete mode 100644 .quarto/xref/d7a01680 delete mode 100644 .quarto/xref/e199f2dd delete mode 100644 .quarto/xref/fa6fa8d5 diff --git a/.gitignore b/.gitignore index 68223f4..b6eaba2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,9 @@ .RData .Ruserdata .Rproj.user/* +.DS_Store +.quarto/* +.Rproj* +README_files/* +_site/* +README.html \ No newline at end of file diff --git a/.quarto/_freeze/about/execute-results/html.json b/.quarto/_freeze/about/execute-results/html.json deleted file mode 100644 index be15350..0000000 --- a/.quarto/_freeze/about/execute-results/html.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "hash": "a754ce02c0472c0b163d8ffb898e69ac", - "result": { - "engine": "knitr", - "markdown": "---\ntitle: \"About\"\n---\n\n\n\n\n\nAbout this site\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n1 + 1\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 2\n```\n\n\n:::\n:::\n", - "supporting": [], - "filters": [ - "rmarkdown/pagebreak.lua" - ], - "includes": {}, - "engineDependencies": {}, - "preserve": {}, - "postProcess": true - } -} \ No newline at end of file diff --git a/.quarto/_freeze/index/execute-results/html.json b/.quarto/_freeze/index/execute-results/html.json deleted file mode 100644 index 5485528..0000000 --- a/.quarto/_freeze/index/execute-results/html.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "hash": "6617ac8635029eb78f09b7ba6924dcc9", - "result": { - "engine": "knitr", - "markdown": "---\ntitle: \"Analytics at Scale\"\n---\n\n\n\nThis is a Quarto website.\n\nTo learn more about Quarto websites visit .\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n1 + 1\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 2\n```\n\n\n:::\n:::\n", - "supporting": [], - "filters": [ - "rmarkdown/pagebreak.lua" - ], - "includes": {}, - "engineDependencies": {}, - "preserve": {}, - "postProcess": true - } -} \ No newline at end of file diff --git a/.quarto/_freeze/site_libs/clipboard/clipboard.min.js b/.quarto/_freeze/site_libs/clipboard/clipboard.min.js deleted file mode 100644 index 1103f81..0000000 --- a/.quarto/_freeze/site_libs/clipboard/clipboard.min.js +++ /dev/null @@ -1,7 +0,0 @@ -/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */ -!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=10?setTimeout((function(){e(r,n,s)}),1):(t.update(),n(s))}}},"./src/filter.js":function(t){t.exports=function(t){return t.handlers.filterStart=t.handlers.filterStart||[],t.handlers.filterComplete=t.handlers.filterComplete||[],function(e){if(t.trigger("filterStart"),t.i=1,t.reset.filter(),void 0===e)t.filtered=!1;else{t.filtered=!0;for(var r=t.items,n=0,s=r.length;nv.page,a=new g(t[s],void 0,n),v.items.push(a),r.push(a)}return v.update(),r}m(t.slice(0),e)}},this.show=function(t,e){return this.i=t,this.page=e,v.update(),v},this.remove=function(t,e,r){for(var n=0,s=0,i=v.items.length;s-1&&r.splice(n,1),v},this.trigger=function(t){for(var e=v.handlers[t].length;e--;)v.handlers[t][e](v);return v},this.reset={filter:function(){for(var t=v.items,e=t.length;e--;)t[e].filtered=!1;return v},search:function(){for(var t=v.items,e=t.length;e--;)t[e].found=!1;return v}},this.update=function(){var t=v.items,e=t.length;v.visibleItems=[],v.matchingItems=[],v.templater.clear();for(var r=0;r=v.i&&v.visibleItems.lengthe},innerWindow:function(t,e,r){return t>=e-r&&t<=e+r},dotted:function(t,e,r,n,s,i,a){return this.dottedLeft(t,e,r,n,s,i)||this.dottedRight(t,e,r,n,s,i,a)},dottedLeft:function(t,e,r,n,s,i){return e==r+1&&!this.innerWindow(e,s,i)&&!this.right(e,n)},dottedRight:function(t,e,r,n,s,i,a){return!t.items[a-1].values().dotted&&(e==n&&!this.innerWindow(e,s,i)&&!this.right(e,n))}};return function(e){var n=new i(t.listContainer.id,{listClass:e.paginationClass||"pagination",item:e.item||"
  • ",valueNames:["page","dotted"],searchClass:"pagination-search-that-is-not-supposed-to-exist",sortClass:"pagination-sort-that-is-not-supposed-to-exist"});s.bind(n.listContainer,"click",(function(e){var r=e.target||e.srcElement,n=t.utils.getAttribute(r,"data-page"),s=t.utils.getAttribute(r,"data-i");s&&t.show((s-1)*n+1,n)})),t.on("updated",(function(){r(n,e)})),r(n,e)}}},"./src/parse.js":function(t,e,r){t.exports=function(t){var e=r("./src/item.js")(t),n=function(r,n){for(var s=0,i=r.length;s0?setTimeout((function(){e(r,s)}),1):(t.update(),t.trigger("parseComplete"))};return t.handlers.parseComplete=t.handlers.parseComplete||[],function(){var e=function(t){for(var e=t.childNodes,r=[],n=0,s=e.length;n]/g.exec(t)){var e=document.createElement("tbody");return e.innerHTML=t,e.firstElementChild}if(-1!==t.indexOf("<")){var r=document.createElement("div");return r.innerHTML=t,r.firstElementChild}}},a=function(e,r,n){var s=void 0,i=function(e){for(var r=0,n=t.valueNames.length;r=1;)t.list.removeChild(t.list.firstChild)},function(){var r;if("function"!=typeof t.item){if(!(r="string"==typeof t.item?-1===t.item.indexOf("<")?document.getElementById(t.item):i(t.item):s()))throw new Error("The list needs to have at least one item on init otherwise you'll have to add a template.");r=n(r,t.valueNames),e=function(){return r.cloneNode(!0)}}else e=function(e){var r=t.item(e);return i(r)}}()};t.exports=function(t){return new e(t)}},"./src/utils/classes.js":function(t,e,r){var n=r("./src/utils/index-of.js"),s=/\s+/;Object.prototype.toString;function i(t){if(!t||!t.nodeType)throw new Error("A DOM element reference is required");this.el=t,this.list=t.classList}t.exports=function(t){return new i(t)},i.prototype.add=function(t){if(this.list)return this.list.add(t),this;var e=this.array();return~n(e,t)||e.push(t),this.el.className=e.join(" "),this},i.prototype.remove=function(t){if(this.list)return this.list.remove(t),this;var e=this.array(),r=n(e,t);return~r&&e.splice(r,1),this.el.className=e.join(" "),this},i.prototype.toggle=function(t,e){return this.list?(void 0!==e?e!==this.list.toggle(t,e)&&this.list.toggle(t):this.list.toggle(t),this):(void 0!==e?e?this.add(t):this.remove(t):this.has(t)?this.remove(t):this.add(t),this)},i.prototype.array=function(){var t=(this.el.getAttribute("class")||"").replace(/^\s+|\s+$/g,"").split(s);return""===t[0]&&t.shift(),t},i.prototype.has=i.prototype.contains=function(t){return this.list?this.list.contains(t):!!~n(this.array(),t)}},"./src/utils/events.js":function(t,e,r){var n=window.addEventListener?"addEventListener":"attachEvent",s=window.removeEventListener?"removeEventListener":"detachEvent",i="addEventListener"!==n?"on":"",a=r("./src/utils/to-array.js");e.bind=function(t,e,r,s){for(var o=0,l=(t=a(t)).length;o32)return!1;var a=n,o=function(){var t,r={};for(t=0;t=p;b--){var j=o[t.charAt(b-1)];if(C[b]=0===m?(C[b+1]<<1|1)&j:(C[b+1]<<1|1)&j|(v[b+1]|v[b])<<1|1|v[b+1],C[b]&d){var x=l(m,b-1);if(x<=u){if(u=x,!((c=b-1)>a))break;p=Math.max(1,2*a-c)}}}if(l(m+1,a)>u)break;v=C}return!(c<0)}},"./src/utils/get-attribute.js":function(t){t.exports=function(t,e){var r=t.getAttribute&&t.getAttribute(e)||null;if(!r)for(var n=t.attributes,s=n.length,i=0;i=48&&t<=57}function i(t,e){for(var i=(t+="").length,a=(e+="").length,o=0,l=0;o=i&&l=a?-1:l>=a&&o=i?1:i-a}i.caseInsensitive=i.i=function(t,e){return i((""+t).toLowerCase(),(""+e).toLowerCase())},Object.defineProperties(i,{alphabet:{get:function(){return e},set:function(t){r=[];var s=0;if(e=t)for(;s { - if (categoriesLoaded) { - activateCategory(category); - setCategoryHash(category); - } -}; - -window["quarto-listing-loaded"] = () => { - // Process any existing hash - const hash = getHash(); - - if (hash) { - // If there is a category, switch to that - if (hash.category) { - activateCategory(hash.category); - } - // Paginate a specific listing - const listingIds = Object.keys(window["quarto-listings"]); - for (const listingId of listingIds) { - const page = hash[getListingPageKey(listingId)]; - if (page) { - showPage(listingId, page); - } - } - } - - const listingIds = Object.keys(window["quarto-listings"]); - for (const listingId of listingIds) { - // The actual list - const list = window["quarto-listings"][listingId]; - - // Update the handlers for pagination events - refreshPaginationHandlers(listingId); - - // Render any visible items that need it - renderVisibleProgressiveImages(list); - - // Whenever the list is updated, we also need to - // attach handlers to the new pagination elements - // and refresh any newly visible items. - list.on("updated", function () { - renderVisibleProgressiveImages(list); - setTimeout(() => refreshPaginationHandlers(listingId)); - - // Show or hide the no matching message - toggleNoMatchingMessage(list); - }); - } -}; - -window.document.addEventListener("DOMContentLoaded", function (_event) { - // Attach click handlers to categories - const categoryEls = window.document.querySelectorAll( - ".quarto-listing-category .category" - ); - - for (const categoryEl of categoryEls) { - const category = categoryEl.getAttribute("data-category"); - categoryEl.onclick = () => { - activateCategory(category); - setCategoryHash(category); - }; - } - - // Attach a click handler to the category title - // (there should be only one, but since it is a class name, handle N) - const categoryTitleEls = window.document.querySelectorAll( - ".quarto-listing-category-title" - ); - for (const categoryTitleEl of categoryTitleEls) { - categoryTitleEl.onclick = () => { - activateCategory(""); - setCategoryHash(""); - }; - } - - categoriesLoaded = true; -}); - -function toggleNoMatchingMessage(list) { - const selector = `#${list.listContainer.id} .listing-no-matching`; - const noMatchingEl = window.document.querySelector(selector); - if (noMatchingEl) { - if (list.visibleItems.length === 0) { - noMatchingEl.classList.remove("d-none"); - } else { - if (!noMatchingEl.classList.contains("d-none")) { - noMatchingEl.classList.add("d-none"); - } - } - } -} - -function setCategoryHash(category) { - setHash({ category }); -} - -function setPageHash(listingId, page) { - const currentHash = getHash() || {}; - currentHash[getListingPageKey(listingId)] = page; - setHash(currentHash); -} - -function getListingPageKey(listingId) { - return `${listingId}-page`; -} - -function refreshPaginationHandlers(listingId) { - const listingEl = window.document.getElementById(listingId); - const paginationEls = listingEl.querySelectorAll( - ".pagination li.page-item:not(.disabled) .page.page-link" - ); - for (const paginationEl of paginationEls) { - paginationEl.onclick = (sender) => { - setPageHash(listingId, sender.target.getAttribute("data-i")); - showPage(listingId, sender.target.getAttribute("data-i")); - return false; - }; - } -} - -function renderVisibleProgressiveImages(list) { - // Run through the visible items and render any progressive images - for (const item of list.visibleItems) { - const itemEl = item.elm; - if (itemEl) { - const progressiveImgs = itemEl.querySelectorAll( - `img[${kProgressiveAttr}]` - ); - for (const progressiveImg of progressiveImgs) { - const srcValue = progressiveImg.getAttribute(kProgressiveAttr); - if (srcValue) { - progressiveImg.setAttribute("src", srcValue); - } - progressiveImg.removeAttribute(kProgressiveAttr); - } - } - } -} - -function getHash() { - // Hashes are of the form - // #name:value|name1:value1|name2:value2 - const currentUrl = new URL(window.location); - const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined; - return parseHash(hashRaw); -} - -const kAnd = "&"; -const kEquals = "="; - -function parseHash(hash) { - if (!hash) { - return undefined; - } - const hasValuesStrs = hash.split(kAnd); - const hashValues = hasValuesStrs - .map((hashValueStr) => { - const vals = hashValueStr.split(kEquals); - if (vals.length === 2) { - return { name: vals[0], value: vals[1] }; - } else { - return undefined; - } - }) - .filter((value) => { - return value !== undefined; - }); - - const hashObj = {}; - hashValues.forEach((hashValue) => { - hashObj[hashValue.name] = decodeURIComponent(hashValue.value); - }); - return hashObj; -} - -function makeHash(obj) { - return Object.keys(obj) - .map((key) => { - return `${key}${kEquals}${obj[key]}`; - }) - .join(kAnd); -} - -function setHash(obj) { - const hash = makeHash(obj); - window.history.pushState(null, null, `#${hash}`); -} - -function showPage(listingId, page) { - const list = window["quarto-listings"][listingId]; - if (list) { - list.show((page - 1) * list.page + 1, list.page); - } -} - -function activateCategory(category) { - // Deactivate existing categories - const activeEls = window.document.querySelectorAll( - ".quarto-listing-category .category.active" - ); - for (const activeEl of activeEls) { - activeEl.classList.remove("active"); - } - - // Activate this category - const categoryEl = window.document.querySelector( - `.quarto-listing-category .category[data-category='${category}'` - ); - if (categoryEl) { - categoryEl.classList.add("active"); - } - - // Filter the listings to this category - filterListingCategory(category); -} - -function filterListingCategory(category) { - const listingIds = Object.keys(window["quarto-listings"]); - for (const listingId of listingIds) { - const list = window["quarto-listings"][listingId]; - if (list) { - if (category === "") { - // resets the filter - list.filter(); - } else { - // filter to this category - list.filter(function (item) { - const itemValues = item.values(); - if (itemValues.categories !== null) { - const categories = itemValues.categories.split(","); - return categories.includes(category); - } else { - return false; - } - }); - } - } - } -} diff --git a/.quarto/idx/index.qmd.json b/.quarto/idx/index.qmd.json deleted file mode 100644 index 24f549e..0000000 --- a/.quarto/idx/index.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Analytics at Scale","markdown":{"yaml":{"title":"Analytics at Scale","subtitle":"Workshopping best practices for big data analytics in epidemiology"},"headingText":"Objective","containsRefs":false,"markdown":"\n\n - Hold weekly workshops **lead by team members** or invitees.\n - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page\n - **Standardize** our analytic workflows.\n \n## Focus\n\n- Best practices for writing **robust**, **reproducible**, and **readable** R code\n- Optimization for big data as our data set grows\n- Tips and tricks for productivity \n- Integration with cloud computing resources and data sources\n- Integrating R and SQL \n\n## Past topics\nDBI and dbplyr, code snippets\n\n## Future workshop topics:\nfunctional programming, `renv`, GitHub, Docker, pipelines, Style Guides, \ndeveloping R packages, publishing data products, RStudio Snippets, `linter`, \n`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, \n`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB,\n... \n\n## References\n- [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham\n- [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez\n- [R Packages](https://r-pkgs.org/) by Hadley Wickham\n- [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff\n\n","srcMarkdownNoYaml":"\n\n## Objective\n - Hold weekly workshops **lead by team members** or invitees.\n - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page\n - **Standardize** our analytic workflows.\n \n## Focus\n\n- Best practices for writing **robust**, **reproducible**, and **readable** R code\n- Optimization for big data as our data set grows\n- Tips and tricks for productivity \n- Integration with cloud computing resources and data sources\n- Integrating R and SQL \n\n## Past topics\nDBI and dbplyr, code snippets\n\n## Future workshop topics:\nfunctional programming, `renv`, GitHub, Docker, pipelines, Style Guides, \ndeveloping R packages, publishing data products, RStudio Snippets, `linter`, \n`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, \n`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB,\n... \n\n## References\n- [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham\n- [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez\n- [R Packages](https://r-pkgs.org/) by Hadley Wickham\n- [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["styles.css"],"toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Analytics at Scale","subtitle":"Workshopping best practices for big data analytics in epidemiology"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/meeting-notes.qmd.json b/.quarto/idx/meeting-notes.qmd.json deleted file mode 100644 index 635b8ba..0000000 --- a/.quarto/idx/meeting-notes.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Meeting Notes","markdown":{"yaml":{"title":"Meeting Notes","listing":{"type":"table","fields":["date","title","subtitle"],"sort":"date desc","categories":false,"contents":"meeting-notes/*.qmd"}},"containsRefs":false,"markdown":"\n","srcMarkdownNoYaml":"\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["styles.css"],"toc":true,"output-file":"meeting-notes.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Meeting Notes","listing":{"type":"table","fields":["date","title","subtitle"],"sort":"date desc","categories":false,"contents":"meeting-notes/*.qmd"}},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/meeting-notes/2024-10-04.qmd.json b/.quarto/idx/meeting-notes/2024-10-04.qmd.json deleted file mode 100644 index d3d42e8..0000000 --- a/.quarto/idx/meeting-notes/2024-10-04.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Counts by Period","markdown":{"yaml":{"title":"Counts by Period","subtitle":"Optimizing counts by period using DBI","date":"2024-10-04","author":"Jake Peters"},"headingText":"Attendees:","containsRefs":false,"markdown":"\n\nJP, KD, BC, LO\n\n### Tutorial: Count by Period w/ DBI\nJP walked through the [Count by Period w/ DBI](https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html) tutorial to demonstrate some code he wrote to create counts by day, week, month, quarter or year using a DBI connection to avoid downloading the data until after aggreagion. This was intended to address some issues that Kelsey was having with the Biospecimen Report which has many tables that are aggregated by date, precluding her from using DBI easily.\n\n### What do we want to use this workshop for?\n\n- Leila:\n\n - Use this to refactor code that is written, but need guidance to improve or use better practices\n\n - Debug problem code together\n\n- Jake\n\n - Tackle big, recurring problems together\n\n - Write functions that can be reused\n\n- Kelsey:\n\n - Refactoring code that has been re-used or passed repeatedly\n\n - Even working with ChatGPT\n\n - Using SQL to do transformations THEN pulling the data down\n\n- Brittany:\n\n - SQL specific discussion.\n\n - We are all a little weaker in SQL.\n\n- SQL:\n\n - Joins\n\n - Unions\n\n - Transformations\n\n - Group By (with counts)\n\n - Ungroup after counts (when is this necessary and why)\n\n - SQL Course on Coursera\n\n### Second Workshop:\n\n- Code Snippets\n\n### Future Worshops:\n\n- Joining Module 1 v1 and v2 Joining Module 2 v1 and v2\n","srcMarkdownNoYaml":"\n\n### Attendees: \nJP, KD, BC, LO\n\n### Tutorial: Count by Period w/ DBI\nJP walked through the [Count by Period w/ DBI](https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html) tutorial to demonstrate some code he wrote to create counts by day, week, month, quarter or year using a DBI connection to avoid downloading the data until after aggreagion. This was intended to address some issues that Kelsey was having with the Biospecimen Report which has many tables that are aggregated by date, precluding her from using DBI easily.\n\n### What do we want to use this workshop for?\n\n- Leila:\n\n - Use this to refactor code that is written, but need guidance to improve or use better practices\n\n - Debug problem code together\n\n- Jake\n\n - Tackle big, recurring problems together\n\n - Write functions that can be reused\n\n- Kelsey:\n\n - Refactoring code that has been re-used or passed repeatedly\n\n - Even working with ChatGPT\n\n - Using SQL to do transformations THEN pulling the data down\n\n- Brittany:\n\n - SQL specific discussion.\n\n - We are all a little weaker in SQL.\n\n- SQL:\n\n - Joins\n\n - Unions\n\n - Transformations\n\n - Group By (with counts)\n\n - Ungroup after counts (when is this necessary and why)\n\n - SQL Course on Coursera\n\n### Second Workshop:\n\n- Code Snippets\n\n### Future Worshops:\n\n- Joining Module 1 v1 and v2 Joining Module 2 v1 and v2\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["../styles.css"],"toc":true,"output-file":"2024-10-04.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Counts by Period","subtitle":"Optimizing counts by period using DBI","date":"2024-10-04","author":"Jake Peters"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/meeting-notes/2024-10-11.qmd.json b/.quarto/idx/meeting-notes/2024-10-11.qmd.json deleted file mode 100644 index 566c390..0000000 --- a/.quarto/idx/meeting-notes/2024-10-11.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"Snippets","markdown":{"yaml":{"title":"Snippets","subtitle":"Custom code snippets in RStudio","date":"2024-10-11","author":"Jake Peters"},"headingText":"Attendees","containsRefs":false,"markdown":"\n\n\nJP, LO, BC\n\n### Notes\n\n- JP walked through a [RStudio Snippets](../tutorials/snippets.qmd) tutorial.\n\n- LO and BC set up custom RStudio Snippets on their local machines.\n","srcMarkdownNoYaml":"\n\n### Attendees\n\nJP, LO, BC\n\n### Notes\n\n- JP walked through a [RStudio Snippets](../tutorials/snippets.qmd) tutorial.\n\n- LO and BC set up custom RStudio Snippets on their local machines.\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["../styles.css"],"toc":true,"output-file":"2024-10-11.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":"visual","theme":"cosmo","title":"Snippets","subtitle":"Custom code snippets in RStudio","date":"2024-10-11","author":"Jake Peters"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/snippets.qmd.json b/.quarto/idx/snippets.qmd.json deleted file mode 100644 index 16b1312..0000000 --- a/.quarto/idx/snippets.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"RStudio Snippets","markdown":{"yaml":{"title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24","format":"html","editor":{"markdown":{"wrap":72}}},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n","srcMarkdownNoYaml":"\n\n# Introduction\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["styles.css"],"toc":true,"output-file":"snippets.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":{"markdown":{"wrap":72}},"theme":"cosmo","title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/tutorials/snippets.qmd.json b/.quarto/idx/tutorials/snippets.qmd.json deleted file mode 100644 index 0981ac7..0000000 --- a/.quarto/idx/tutorials/snippets.qmd.json +++ /dev/null @@ -1 +0,0 @@ -{"title":"RStudio Snippets","markdown":{"yaml":{"title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24","format":"html","editor":{"markdown":{"wrap":72}}},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n","srcMarkdownNoYaml":"\n\n# Introduction\n\nThis tutorial demonstrates how to use both built-in and custom RStudio\nsnippets within an R script. By leveraging snippets, you can streamline\nyour coding process, reduce repetitive tasks, and maintain consistency\nacross your projects.\n\n## Table of Contents\n\n1. Introduction to RStudio Snippets\n2. Using Built-in Snippets\n3. Creating and Using Custom Snippets\n\n## Introduction to RStudio Snippets\n\n*Snippets* in RStudio are predefined blocks of code that can be quickly\ninserted into your scripts or markdown files. They enhance productivity\nby minimizing repetitive typing and ensuring consistency across your\ncoding projects.\n\nIn this tutorial, we'll explore how to utilize RStudio's built-in\nsnippets and create custom snippets tailored to your specific workflow.\n\n------------------------------------------------------------------------\n\n## Using Built-in Snippets\n\nRStudio comes equipped with several built-in snippets that can be\neffortlessly used in `.qmd` files. Below are some commonly used built-in\nsnippets along with examples of their usage.\n\n#### Fun\n\nType `fun` and press `Tab`.\n\n``` r\nfun my_function <- function(arg1, arg2) {\n# Function body\n}\n```\n\n#### For\n\nType `for` and press `Tab`.\n\n``` r\nfor (variable in vector) {\n \n}\n```\n\n#### While\n\nType `while` and press `Tab`.\n\n``` r\nwhile (condition) {\n \n}\n```\n\n#### Apply\n\nType `lapply` and press `Tab`.\n\n``` r\napply(array, margin, ...)\n```\n\n#### Lapply\n\nType `lapply` and press `Tab`.\n\n``` r\nlapply(list, function)\n```\n\n#### ts\n\nType `ts` and press `Tab`.\n\n``` r\n# Fri Oct 11 09:53:23 2024 ------------------------------\n```\n\n## Using Custom Snippets\n\nGo to `Tools` \\> `Global Options` \\> `Snippets` \\> `Edit`.\n\nThen paste the following at the bottom of the page and hit `Apply`\n\n``` r\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n```\n\n![Gif showing the workflow of creating/using your own\nsnippet](media/snippets.gif)\n\n::: callout-note\nNote that all code after the `snippet ` key word must\nbe indented.\n:::\n\n## More custom snippets to try\n\n``` r\nsnippet header\n ## Description =================================================================\n # Title: ${1:Script Title}\n # Author: ${2:Your Name}\n # Date: ${3:`r format(Sys.Date(), \"%Y-%m-%d\")`}\n # Objective: ${4:Script Purpose}\n # GH-Issue: ${5:Github Issue}\n\nsnippet query\n bq_auth() # Authenticate with BigQuery\n con <- DBI::dbConnect(bigrquery::bigquery(), \n project=project, dataset=dataset, billing=billing))\n \n # Specify just the data we need with a query\n dataset <- \"FlatConnect\"\n table <- \"participants_JP\"\n tier <- \"nih-nci-dceg-connect-prod-6d04\"\n sql <- glue::glue(\n \"\"\"\n SELECT \n Connect_ID,\n token\n FROM `{project}.{dataset}.{table}`\n \"\"\")\n \n # Query the data and store as reference object\ndata <- dbGetQuery(con, SQL)\n\nsnippet dev\n \"nih-nci-dceg-connect-dev\"\n\nsnippet stg\n \"nih-nci-dceg-connect-stg-5519\"\n\nsnippet prod\n \"nih-nci-dceg-connect-prod-6d04\"\n\nsnippet exclusions\n WHERE \n p.d_821247024 = '197316935' -- 'Verif Status' IS 'Verified'\n AND p.d_747006172 != '353358909' -- 'Withdraw consent' IS NOT \"Yes\"\n AND p.d_100767870 = '104430631' -- 'All Base Surv Complete' = 'No'\n AND p.d_685002411_d_994064239 != '353358909' -- 'Ref Base Survs' IS NOT \"Yes\"\n AND p.d_987563196 != '353358909' -- 'Dead' IS NOT 'Yes'\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["../styles.css"],"toc":true,"output-file":"snippets.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.5.55","editor":{"markdown":{"wrap":72}},"theme":"cosmo","title":"RStudio Snippets","author":"Jake Peters","date":"10/11/24"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/listing/listing-cache.json b/.quarto/listing/listing-cache.json deleted file mode 100644 index ad0312b..0000000 --- a/.quarto/listing/listing-cache.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "listingMap": { - "meeting-notes.qmd": [ - "meeting-notes/*.qmd" - ] - } -} \ No newline at end of file diff --git a/.quarto/preview/lock b/.quarto/preview/lock deleted file mode 100644 index a7ae138..0000000 --- a/.quarto/preview/lock +++ /dev/null @@ -1 +0,0 @@ -50290 \ No newline at end of file diff --git a/.quarto/xref/598b2e90 b/.quarto/xref/598b2e90 deleted file mode 100644 index 7df77d5..0000000 --- a/.quarto/xref/598b2e90 +++ /dev/null @@ -1 +0,0 @@ -{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/6afc8b7a b/.quarto/xref/6afc8b7a deleted file mode 100644 index e9bc5b2..0000000 --- a/.quarto/xref/6afc8b7a +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":["attendees","tutorial-count-by-period-w-dbi","what-do-we-want-to-use-this-workshop-for","second-workshop","future-worshops"]} \ No newline at end of file diff --git a/.quarto/xref/755cd591 b/.quarto/xref/755cd591 deleted file mode 100644 index 0eaf8d7..0000000 --- a/.quarto/xref/755cd591 +++ /dev/null @@ -1 +0,0 @@ -{"headings":["introduction","table-of-contents","introduction-to-rstudio-snippets","using-built-in-snippets","fun","for","while","apply","lapply","ts","using-custom-snippets","more-custom-snippets-to-try"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/862fc1db b/.quarto/xref/862fc1db deleted file mode 100644 index 208374b..0000000 --- a/.quarto/xref/862fc1db +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":[]} \ No newline at end of file diff --git a/.quarto/xref/INDEX b/.quarto/xref/INDEX deleted file mode 100644 index 74a284d..0000000 --- a/.quarto/xref/INDEX +++ /dev/null @@ -1,26 +0,0 @@ -{ - "snippets.qmd": { - "snippets.html": "755cd591" - }, - "about.qmd": { - "about.html": "598b2e90" - }, - "index.qmd": { - "index.html": "fa6fa8d5" - }, - "meeting-notes.qmd": { - "meeting-notes.html": "d7a01680" - }, - "meeting-notes/24.10.04.qmd": { - "24.10.04.html": "862fc1db" - }, - "meeting-notes/2024-10-04.qmd": { - "2024-10-04.html": "6afc8b7a" - }, - "meeting-notes/2024-10-11.qmd": { - "2024-10-11.html": "e199f2dd" - }, - "tutorials/snippets.qmd": { - "snippets.html": "d0caaf14" - } -} \ No newline at end of file diff --git a/.quarto/xref/d0caaf14 b/.quarto/xref/d0caaf14 deleted file mode 100644 index 920cfc8..0000000 --- a/.quarto/xref/d0caaf14 +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":["introduction","table-of-contents","introduction-to-rstudio-snippets","using-built-in-snippets","fun","for","while","apply","lapply","ts","using-custom-snippets","more-custom-snippets-to-try"]} \ No newline at end of file diff --git a/.quarto/xref/d7a01680 b/.quarto/xref/d7a01680 deleted file mode 100644 index 7df77d5..0000000 --- a/.quarto/xref/d7a01680 +++ /dev/null @@ -1 +0,0 @@ -{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/e199f2dd b/.quarto/xref/e199f2dd deleted file mode 100644 index ee9fd39..0000000 --- a/.quarto/xref/e199f2dd +++ /dev/null @@ -1 +0,0 @@ -{"headings":["attendees","notes"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/fa6fa8d5 b/.quarto/xref/fa6fa8d5 deleted file mode 100644 index 3c647f9..0000000 --- a/.quarto/xref/fa6fa8d5 +++ /dev/null @@ -1 +0,0 @@ -{"entries":[],"headings":["objective","focus","past-topics","future-workshop-topics","references"]} \ No newline at end of file diff --git a/_quarto.yml b/_quarto.yml index af5c63b..9bf45b7 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -9,8 +9,8 @@ website: tools: - icon: github menu: - - text: Source Code - href: https://github.com/analyticsphere/analytics-at-scale + - text: Website Source Code + href: https://github.com/Analyticsphere/analytics-at-scale-workshop - text: Analyticshpere href: https://github.com/analyticsphere diff --git a/index.qmd b/index.qmd index 7bdce6e..7383361 100644 --- a/index.qmd +++ b/index.qmd @@ -4,31 +4,30 @@ subtitle: Workshopping best practices for big data analytics in epidemiology --- ## Objective - - Hold weekly workshops **lead by team members** or invitees. - - **Produce resources** such as videos, tutorials and code resources from workshops on this resource page - - **Standardize** our analytic workflows. - + +- Hold weekly workshops **lead by team members** or invitees. +- **Produce resources** such as videos, tutorials and code resources from workshops on this resource page +- **Standardize** our analytic workflows. + ## Focus -- Best practices for writing **robust**, **reproducible**, and **readable** R code -- Optimization for big data as our data set grows -- Tips and tricks for productivity -- Integration with cloud computing resources and data sources -- Integrating R and SQL +- Best practices for writing **robust**, **reproducible**, and **readable** R code +- Optimization for big data as our data set grows +- Tips and tricks for productivity +- Integration with cloud computing resources and data sources +- Integrating R and SQL ## Past topics + DBI and dbplyr, code snippets ## Future workshop topics: -functional programming, `renv`, GitHub, Docker, pipelines, Style Guides, -developing R packages, publishing data products, RStudio Snippets, `linter`, -`styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, -`plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB, -... + +functional programming, `renv`, GitHub, Docker, pipelines, Style Guides, developing R packages, publishing data products, RStudio Snippets, `linter`, `styler`, `rshiny`, `rix`, SQL & R, Quarto, Quarto Websites, Quarto Dashboards, `plumber` APIs, data structures, JSON in R, mermaid diagrams, SQLlite, DuckDB, ... ## References + - [Advanced R](http://adv-r.had.co.nz/) by Hadley Wickham - [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) by Bruno Rodriguez - [R Packages](https://r-pkgs.org/) by Hadley Wickham - [Modern Data Visualization with R](https://rkabacoff.github.io/datavis/) by Robert Kabacoff - From 277416454f9ce6d038e5459966708562e2bdb4b8 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 13:31:42 -0400 Subject: [PATCH 21/53] remove rproj --- .gitignore | 2 +- analytics_at_scale.Rproj | 13 ------------- 2 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 analytics_at_scale.Rproj diff --git a/.gitignore b/.gitignore index 4fddf51..eedecc9 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ README_files/* _site/* README.html - +*.Rproj diff --git a/analytics_at_scale.Rproj b/analytics_at_scale.Rproj deleted file mode 100644 index 8e3c2eb..0000000 --- a/analytics_at_scale.Rproj +++ /dev/null @@ -1,13 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX From 7ea2faa5ed7aaceb39928356dc5e2e151f4ae516 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 13:48:10 -0400 Subject: [PATCH 22/53] updated gif path for snippets --- tutorials/snippets.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/snippets.qmd b/tutorials/snippets.qmd index 1df2b04..1ebf5d4 100644 --- a/tutorials/snippets.qmd +++ b/tutorials/snippets.qmd @@ -105,7 +105,7 @@ snippet dev ``` ![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) +snippet](../media/snippets.gif) ::: callout-note Note that all code after the `snippet ` key word must From 639a8a8fad820377aadc95df58b1ada8e29cd10e Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 13:48:10 -0400 Subject: [PATCH 23/53] updated gif path for snippets --- tutorials/snippets.qmd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tutorials/snippets.qmd b/tutorials/snippets.qmd index 1df2b04..6315ede 100644 --- a/tutorials/snippets.qmd +++ b/tutorials/snippets.qmd @@ -2,6 +2,8 @@ title: "RStudio Snippets" author: "Jake Peters" date: 10/11/24 +keywords: + - productivity format: html editor: markdown: @@ -105,7 +107,7 @@ snippet dev ``` ![Gif showing the workflow of creating/using your own -snippet](media/snippets.gif) +snippet](../media/snippets.gif) ::: callout-note Note that all code after the `snippet ` key word must From 06d92eb68950039dc6800a3a3af28da7b9f98777 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 14:53:34 -0400 Subject: [PATCH 24/53] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 2a27e5b..d172f04 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ # analytics-at-scale-workshop -live at - +Website live at From 12f1f5e6e81c49a993d9993861296d1b67f78cc5 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 14:57:16 -0400 Subject: [PATCH 25/53] updated meeting notes --- meeting-notes/2024-10-25.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/meeting-notes/2024-10-25.md b/meeting-notes/2024-10-25.md index 23c4504..742ab58 100644 --- a/meeting-notes/2024-10-25.md +++ b/meeting-notes/2024-10-25.md @@ -4,9 +4,13 @@ subtitle: Introduce Analytics at Scale wiki date: 2024-10-25 --- -# Attendees: -JP, .. +# Attendees: + +JP, BC, LO, KD # Agenda -- Introduce Wiki -- Discuss agenda for next meeting + +- Introduce Wiki +- Discuss agenda for next meeting + - Leila volunteered to give tutorial on `ggplot` templating on Nov. 1 + - Kelsey to do workshop on kable tables next From c1a407b428b4b3551b6bbaab47910de592be398a Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 15:00:32 -0400 Subject: [PATCH 26/53] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index d172f04..b35ef25 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ # analytics-at-scale-workshop Website live at + +## Contributing tutorials +Simply add a *.qmd*, *.rmd*, or *.md* file to the *tutorials/* directory. Once commited/pushed to main the website will rebuild/deploy to gh-pages. From c478b0024cd06b4e8ca4f9e29ccbb301a0abc9c9 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 25 Oct 2024 15:38:30 -0400 Subject: [PATCH 27/53] Update README.md --- README.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b35ef25..d126fcf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,15 @@ # analytics-at-scale-workshop -Website live at +The Quarto Website is live at -## Contributing tutorials -Simply add a *.qmd*, *.rmd*, or *.md* file to the *tutorials/* directory. Once commited/pushed to main the website will rebuild/deploy to gh-pages. +## How to contribute a tutorial + +Simply add a *.qmd*, *.rmd*, or *.md* file to the *tutorials/* directory. + +Once it is commited/pushed to main the website will rebuild/deploy to gh-pages. + +If a tutorial lives outside of this tutorial we can add it as a link in the `External Tutorials` tab. + +## How to add Meeting Notes + +Simply add a *.md* file to the *meeting-notes/* folder. From d9471dd877e04d97dee26928a6477923fe8c6f03 Mon Sep 17 00:00:00 2001 From: lorszag Date: Mon, 28 Oct 2024 17:13:59 -0400 Subject: [PATCH 28/53] Add files via upload Here is my tutorial for this week --- tutorials/ggplot_templates.qmd | 213 +++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 tutorials/ggplot_templates.qmd diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd new file mode 100644 index 0000000..a7813ec --- /dev/null +++ b/tutorials/ggplot_templates.qmd @@ -0,0 +1,213 @@ +--- +title: "ggplot_templates" +format: pdf +editor: visual +author: Leila Orszag +date: 11-1-2024 +--- + +## Purpose + +In this session, we will review how to use ggplot themes and review a few templates that could be useful for future data visualization. + +## Colors + +We have a set of pre-selected Connect colors, listed below. + +```{r} +color_palette <- list( + blue = c("#2973A5", "#648EB4", "#8BAAC7", "#B1C7D9", "#D8E3EC"), + darkblue = c("#164C71", "#51708A", "#7C94A8", "#A8B7C5", "#D3DBE2"), + yellow = c("#FDBE19", "#F6CC6C", "#F8D991", "#FBE5B5", "#FDF2DA"), + skyblue = c("#309EBD", "#74B0C7", "#97C4D5", "#B9D7E3", "#DDECF1"), + turq = c("#3C989E", "#77ACB0", "#99C0C4", "#BBD5D7", "#DDEAEC"), + grey = c("#565C65", "#797D83", "#9A9DA3", "#BBBEC1", "#DDDEE0"), + brown = c("#CC7D15", "#CD995B", "#DAB384", "#E7CCAD", "#F3E6D6") + ) +``` + +Using Rebecca's code, we can pull the number of distinct colors we need: + +```{r} +select_colors <- function(number) { + # Initialize a vector to store selected colors + selected_colors <- character(number) # Assuming colors are character strings + + # Get the number of color groups and the maximum number of shades + num_groups <- length(color_palette) + max_shades <- max(sapply(color_palette, length)) + + # Loop through each shade level and then each color group to fill the selected_colors + counter <- 1 + for (shade in 1:max_shades) { + for (group in 1:num_groups) { + current_palette <- color_palette[[group]] + if (length(current_palette) >= shade && counter <= number) { + selected_colors[counter] <- current_palette[shade] + counter <- counter + 1 + } + if (counter > number) break # Stop if we've reached the desired number + } + if (counter > number) break + } + + return(selected_colors) +} +``` + +# ggplot +## Simple ggplot Example +Pull in relevant library +```{r} +library(ggplot2) +``` + +Create the data frame +```{r} +x = c(10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60) +y = c(5, 2, 3, 4, 1, 6, 4, 7, 2, 3, 5, 5) +z = c("girl", "boy", "girl", "boy", "girl", "boy", "girl", "boy", "girl", "boy", "girl", "boy") +data = data.frame(x, y) +``` + +Create our first line graph +```{r} +ggplot(data, aes(x = x, y = y)) + + geom_point(aes(color = z))+ + geom_line(aes(color = z)) +``` +This graph is great but doesn't align with our aesthetic. + + +```{r} +ggplot(data, aes(x = x, y = y)) + + geom_point(aes(color = z)) + + geom_line(aes(color = z))+ + labs(title = "Title", + x = "x", + y = "y", + legend = "Z") + + scale_colour_manual(values = select_colors(2))+ + theme(plot.title = element_text(hjust = 0.5), + panel.background = element_rect(fill = "white"), + panel.grid.major = element_line(color = "grey"), + panel.grid.minor = element_line(color = "grey")) +``` +Looks much better, but we want to wrap it into a function. + +## Creating & Using Function +Below is a function that you can add to a plot after specifying titles & the number of categories (colors) you need for your graph. +```{r} +theme_function = function(title, xlab, ylab, legend_lab, n) { + # Add any theme specifications in here + theme <- theme( + plot.title = element_text(hjust = 0.5), + panel.background = element_rect(fill = "white"), + panel.grid.major = element_line(color = "lightgrey"), + panel.grid.minor = element_line(color = "lightgrey") + ) + + # Defining the colors we will return + color_scale <- scale_color_manual(values = select_colors(n)) + color_scale2 <- scale_fill_manual(values = select_colors(n)) + + # Return a list containing the elements we want standardized + list( + # Change labels, or add new ones + labs( + title = title, + x = xlab, + y = ylab, + color = legend_lab), + theme, + color_scale, + color_scale2 + ) +} +``` + + +```{r} +ggplot(data, aes(x = x, y = y)) + + geom_point(aes(color = z)) + + geom_line(aes(color = z)) + + theme_function("Title", "X", "Y", "Gender", 2) +``` + +## Histogram +```{r} +library(palmerpenguins) +colnames(penguins) +``` + +```{r} +ggplot(penguins)+ + geom_histogram(aes(x= bill_length_mm, color = sex, fill = sex), binwidth = 1)+ + theme_function("Title", "X", "Y", "Gender", 3) +``` +## Bar Graph +```{r} +ggplot(penguins)+ + geom_bar(aes(x= island, color = sex, fill = sex))+ + theme_function("Title", "X", "Y", "Gender", 3) +``` + +# plotly +```{r} +library(plotly) +``` + +## Simple plotly Example +Create our first line graph +```{r} +plot_ly(data, + x = x, + y = y, + split = ~z, + type = "scatter", + mode = "line & markers") +``` + +```{r} +plot_ly(data, + x = x, + y = y, + color = ~z, + type = "scatter", + mode = "line & markers", + colors = select_colors(2)) %>% + layout( + title = 'Title', + xaxis = list(title = "x"), + yaxis = list(title = "y"), + legend = list(title = list(text = "Gender")) + ) +``` + +## Creating and using function +```{r} +theme_plotly = function(plot, title, xlab, ylab, legend_lab) { + plot <- plot %>% + layout( + title = title, + xaxis = list(title = xlab), # Use the xlab parameter + yaxis = list(title = ylab), # Use the ylab parameter + legend = list(title = list(text = legend_lab)) # Use the legend_lab parameter + ) + +plot # Return the modified plot +} +``` + + +```{r} +plot = plot_ly(data, + x = x, + y = y, + color = ~z, + type = "scatter", + mode = "line & markers", + colors = select_colors(2)) +theme_plotly(plot, "Title", "X", "Y", "Legend") +``` + From 90a3de2446b3bf82adc9a66eb9d30bca30afd0d1 Mon Sep 17 00:00:00 2001 From: lorszag Date: Mon, 28 Oct 2024 17:18:57 -0400 Subject: [PATCH 29/53] new version with rmarkdown package installed and called --- tutorials/ggplot_templates.qmd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd index a7813ec..9f9e27c 100644 --- a/tutorials/ggplot_templates.qmd +++ b/tutorials/ggplot_templates.qmd @@ -5,6 +5,10 @@ editor: visual author: Leila Orszag date: 11-1-2024 --- +```{r} +library(rmarkdown) +``` + ## Purpose From 02dffb44914d14f44ecc6c044f0b143403eb72ed Mon Sep 17 00:00:00 2001 From: lorszag Date: Mon, 28 Oct 2024 17:20:16 -0400 Subject: [PATCH 30/53] Update ggplot_templates.qmd --- tutorials/ggplot_templates.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd index 9f9e27c..f1d7aca 100644 --- a/tutorials/ggplot_templates.qmd +++ b/tutorials/ggplot_templates.qmd @@ -1,6 +1,6 @@ --- title: "ggplot_templates" -format: pdf +format: html editor: visual author: Leila Orszag date: 11-1-2024 From 49f7a0283849d6d664b5b7ab53a03c2f14c37719 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Mon, 28 Oct 2024 17:30:40 -0400 Subject: [PATCH 31/53] tweak --- tutorials/ggplot_templates.qmd | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd index f1d7aca..da5c225 100644 --- a/tutorials/ggplot_templates.qmd +++ b/tutorials/ggplot_templates.qmd @@ -1,18 +1,18 @@ --- -title: "ggplot_templates" +title: "ggplot Templates" format: html editor: visual author: Leila Orszag date: 11-1-2024 --- -```{r} + +```{r, warning=FALSE} library(rmarkdown) ``` - ## Purpose -In this session, we will review how to use ggplot themes and review a few templates that could be useful for future data visualization. +In this session, we will review how to use ggplot themes and review a few templates that could be useful for future data visualization. ## Colors @@ -60,13 +60,17 @@ select_colors <- function(number) { ``` # ggplot + ## Simple ggplot Example + Pull in relevant library + ```{r} library(ggplot2) ``` Create the data frame + ```{r} x = c(10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60) y = c(5, 2, 3, 4, 1, 6, 4, 7, 2, 3, 5, 5) @@ -75,13 +79,14 @@ data = data.frame(x, y) ``` Create our first line graph + ```{r} ggplot(data, aes(x = x, y = y)) + geom_point(aes(color = z))+ geom_line(aes(color = z)) ``` -This graph is great but doesn't align with our aesthetic. +This graph is great but doesn't align with our aesthetic. ```{r} ggplot(data, aes(x = x, y = y)) + @@ -97,10 +102,13 @@ ggplot(data, aes(x = x, y = y)) + panel.grid.major = element_line(color = "grey"), panel.grid.minor = element_line(color = "grey")) ``` + Looks much better, but we want to wrap it into a function. ## Creating & Using Function + Below is a function that you can add to a plot after specifying titles & the number of categories (colors) you need for your graph. + ```{r} theme_function = function(title, xlab, ylab, legend_lab, n) { # Add any theme specifications in here @@ -130,7 +138,6 @@ theme_function = function(title, xlab, ylab, legend_lab, n) { } ``` - ```{r} ggplot(data, aes(x = x, y = y)) + geom_point(aes(color = z)) + @@ -139,6 +146,7 @@ ggplot(data, aes(x = x, y = y)) + ``` ## Histogram + ```{r} library(palmerpenguins) colnames(penguins) @@ -149,7 +157,9 @@ ggplot(penguins)+ geom_histogram(aes(x= bill_length_mm, color = sex, fill = sex), binwidth = 1)+ theme_function("Title", "X", "Y", "Gender", 3) ``` + ## Bar Graph + ```{r} ggplot(penguins)+ geom_bar(aes(x= island, color = sex, fill = sex))+ @@ -157,12 +167,15 @@ ggplot(penguins)+ ``` # plotly + ```{r} library(plotly) ``` ## Simple plotly Example + Create our first line graph + ```{r} plot_ly(data, x = x, @@ -189,6 +202,7 @@ plot_ly(data, ``` ## Creating and using function + ```{r} theme_plotly = function(plot, title, xlab, ylab, legend_lab) { plot <- plot %>% @@ -203,7 +217,6 @@ plot # Return the modified plot } ``` - ```{r} plot = plot_ly(data, x = x, @@ -214,4 +227,3 @@ plot = plot_ly(data, colors = select_colors(2)) theme_plotly(plot, "Title", "X", "Y", "Legend") ``` - From 3adb192b35f88727f6886043db72e27ea1d1aa57 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Mon, 28 Oct 2024 17:50:12 -0400 Subject: [PATCH 32/53] added packages --- .github/workflows/quarto-publish.yml | 68 ++++++++++++---------------- 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 338ca78..1441555 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -4,63 +4,53 @@ on: name: Render and Publish -# you need these permissions to publish to GitHub pages +# You need these permissions to publish to GitHub Pages permissions: - contents: write - pages: write + contents: write + pages: write jobs: build-deploy: runs-on: ubuntu-latest steps: + # Step 1: Check out the repository - name: Check out repository uses: actions/checkout@v4 - + + # Step 2: Set up Quarto - name: Set up Quarto uses: quarto-dev/quarto-actions/setup@v2 with: - # To install LaTeX to build PDF book - tinytex: true - # uncomment below and fill to pin a version + # To install LaTeX to build PDF book + tinytex: true + # Uncomment below and fill to pin a version if needed # version: SPECIFIC-QUARTO-VERSION-HERE - - # add software dependencies here and any libraries - - # From https://github.com/actions/setup-python - # - name: Setup Python - # uses: actions/setup-python@v3 - - # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - # - name: Setup R - # uses: r-lib/actions/setup-r@v2 - # From https://github.com/julia-actions/setup-julia - # - name: Setup Julia - # uses: julia-actions/setup-julia@v1 + # Step 3: Set up R + - name: Set up R + uses: r-lib/actions/setup-r@v2 + with: + # Optionally specify the R version + # r-version: '4.4.1' # Uncomment and set if a specific version is needed - # See more at https://github.com/quarto-dev/quarto-actions/blob/main/examples/example-03-dependencies.md + # Step 4: Install Required R Packages + - name: Install R Packages + run: | + install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") - # To publish to Netlify, RStudio Connect, or GitHub Pages, uncomment - # the appropriate block below - - # - name: Publish to Netlify (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: netlify - # NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - - # - name: Publish to RStudio Connect (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: connect - # CONNECT_SERVER: enter-the-server-url-here - # CONNECT_API_KEY: ${{ secrets.CONNECT_API_KEY }} + # Optional Step: Cache R Packages (Improves build times) + - name: Cache R Packages + uses: actions/cache@v3 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ hashFiles('**/renv.lock') }} + restore-keys: | + ${{ runner.os }}-r- - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) + # Step 5: Publish to GitHub Pages (and render) - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 with: target: gh-pages env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # this secret is always available for github actions - \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This secret is always available for GitHub Actions From 5570c391de07506c5592bf01c8e01afca7192c27 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Mon, 28 Oct 2024 17:50:12 -0400 Subject: [PATCH 33/53] added packages --- .github/workflows/quarto-publish.yml | 80 +++++++++++++--------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 338ca78..b3a0f6e 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -1,66 +1,58 @@ +name: Render and Publish + on: push: - branches: main + branches: [main] -name: Render and Publish - -# you need these permissions to publish to GitHub pages +# You need these permissions to publish to GitHub Pages permissions: - contents: write - pages: write + contents: write + pages: write + id-token: write # Required for authentication in some cases jobs: build-deploy: runs-on: ubuntu-latest + steps: + # Step 1: Check out the repository - name: Check out repository uses: actions/checkout@v4 - + + # Step 2: Set up Quarto - name: Set up Quarto uses: quarto-dev/quarto-actions/setup@v2 with: - # To install LaTeX to build PDF book - tinytex: true - # uncomment below and fill to pin a version - # version: SPECIFIC-QUARTO-VERSION-HERE - - # add software dependencies here and any libraries - - # From https://github.com/actions/setup-python - # - name: Setup Python - # uses: actions/setup-python@v3 - - # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - # - name: Setup R - # uses: r-lib/actions/setup-r@v2 - - # From https://github.com/julia-actions/setup-julia - # - name: Setup Julia - # uses: julia-actions/setup-julia@v1 + # To install LaTeX to build PDF book + tinytex: true + # Uncomment below and specify a Quarto version if needed + # version: '1.3.364' + + # Step 3: Set up R + - name: Set up R + uses: r-lib/actions/setup-r@v2 + with: + # Optionally specify the R version + # r-version: '4.4.1' - # See more at https://github.com/quarto-dev/quarto-actions/blob/main/examples/example-03-dependencies.md + # Step 4: Install Required R Packages + - name: Install R Packages + run: | + install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") - # To publish to Netlify, RStudio Connect, or GitHub Pages, uncomment - # the appropriate block below - - # - name: Publish to Netlify (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: netlify - # NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - - # - name: Publish to RStudio Connect (and render) - # uses: quarto-dev/quarto-actions/publish@v2 - # with: - # target: connect - # CONNECT_SERVER: enter-the-server-url-here - # CONNECT_API_KEY: ${{ secrets.CONNECT_API_KEY }} + # Optional Step 5: Cache R Packages (Improves build times) + - name: Cache R Packages + uses: actions/cache@v3 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ hashFiles('**/renv.lock') }} + restore-keys: | + ${{ runner.os }}-r- - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) + # Step 6: Publish to GitHub Pages (and render) - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 with: target: gh-pages env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # this secret is always available for github actions - \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This secret is always available for GitHub Actions From 0b7f8ac4756db04df57bd8c355985769009c5842 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Mon, 28 Oct 2024 18:06:26 -0400 Subject: [PATCH 34/53] Update quarto-publish.yml --- .github/workflows/quarto-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 2baa285..df2976b 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -29,7 +29,7 @@ jobs: uses: r-lib/actions/setup-r@v2 with: # Optionally specify the R version - # r-version: '4.4.1' + r-version: '4.4.1' - name: Install R Packages run: install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") From f23b5c4cbb8aa9daff5210053dd7b66a555ac089 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Mon, 28 Oct 2024 18:10:14 -0400 Subject: [PATCH 35/53] tweak --- .github/workflows/quarto-publish.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 2baa285..bb40c31 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -29,11 +29,7 @@ jobs: uses: r-lib/actions/setup-r@v2 with: # Optionally specify the R version - # r-version: '4.4.1' - - - name: Install R Packages - run: install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") - + r-version: '4.4.1' - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 From 782dc09ca7628d326c993bf65db3ec53af54beea Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:12:57 -0400 Subject: [PATCH 36/53] reverted quarto-publish.yml to 8e9d44fdf --- .github/workflows/quarto-publish.yml | 30 ++++++++++++++++------------ 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index bb40c31..c9919f1 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -1,16 +1,13 @@ - -name: Render and Publish - on: push: branches: main -# You need these permissions to publish to GitHub Pages -permissions: - contents: write - pages: write - id-token: write # Required for authentication in some cases +name: Render and Publish +# you need these permissions to publish to GitHub pages +permissions: + contents: write + pages: write jobs: build-deploy: @@ -24,13 +21,20 @@ jobs: with: # To install LaTeX to build PDF book tinytex: true + # uncomment below and fill to pin a version + # version: SPECIFIC-QUARTO-VERSION-HERE - - name: Set up R - uses: r-lib/actions/setup-r@v2 - with: - # Optionally specify the R version - r-version: '4.4.1' + # add software dependencies here and any libraries + + # From https://github.com/actions/setup-python + # - name: Setup Python + # uses: actions/setup-python@v3 + + # From https://github.com/r-lib/actions/tree/v2-branch/setup-r + - name: Setup R + uses: r-lib/actions/setup-r@v2 + # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 with: From 9264698c21fb81ead0faa2f7799cb41efce1d3f2 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:16:43 -0400 Subject: [PATCH 37/53] Update quarto-publish.yml --- .github/workflows/quarto-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index c9919f1..a83292a 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -32,7 +32,7 @@ jobs: # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - name: Setup R - uses: r-lib/actions/setup-r@v2 + uses: r-lib/actions/setup-r@v2 # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - name: Publish to GitHub Pages (and render) From 349281de60a18a0ebe240a59d285887579f63611 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:23:34 -0400 Subject: [PATCH 38/53] added knitr and rmarkdown --- .github/workflows/quarto-publish.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index a83292a..83a0c7e 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -34,6 +34,10 @@ jobs: - name: Setup R uses: r-lib/actions/setup-r@v2 + - name: Install R Packages + run: | + install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") + # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 From ea83ac81a35f5c78d7c02d35c8ad552715492461 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:25:44 -0400 Subject: [PATCH 39/53] removed unneccessary rmarkdown package --- tutorials/ggplot_templates.qmd | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd index da5c225..ee07b91 100644 --- a/tutorials/ggplot_templates.qmd +++ b/tutorials/ggplot_templates.qmd @@ -6,10 +6,6 @@ author: Leila Orszag date: 11-1-2024 --- -```{r, warning=FALSE} -library(rmarkdown) -``` - ## Purpose In this session, we will review how to use ggplot themes and review a few templates that could be useful for future data visualization. From f63df24f4d635ec35bd02d8c4e8de4a4ce05616c Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:27:13 -0400 Subject: [PATCH 40/53] remove rmarkdown and knitr installation --- .github/workflows/quarto-publish.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 83a0c7e..a83292a 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -34,10 +34,6 @@ jobs: - name: Setup R uses: r-lib/actions/setup-r@v2 - - name: Install R Packages - run: | - install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 From a17df2f7c6b17799c1346f5c8fcb2a47c5d9a4a2 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:34:31 -0400 Subject: [PATCH 41/53] Update quarto-publish.yml --- .github/workflows/quarto-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index a83292a..e3f0c52 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -31,8 +31,8 @@ jobs: # uses: actions/setup-python@v3 # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - - name: Setup R - uses: r-lib/actions/setup-r@v2 + # - name: Setup R + # uses: r-lib/actions/setup-r@v2 # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) - name: Publish to GitHub Pages (and render) From f7741e8dbad830309a3ad6fc88537da1bf86bf2a Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:40:25 -0400 Subject: [PATCH 42/53] Update ggplot_templates.qmd --- tutorials/ggplot_templates.qmd | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd index ee07b91..a9fccf3 100644 --- a/tutorials/ggplot_templates.qmd +++ b/tutorials/ggplot_templates.qmd @@ -1,11 +1,17 @@ --- title: "ggplot Templates" -format: html -editor: visual author: Leila Orszag -date: 11-1-2024 +date: 11/01/2024 +keywords: + - productivity + - modularization +format: html +editor: + markdown: + wrap: 72 --- + ## Purpose In this session, we will review how to use ggplot themes and review a few templates that could be useful for future data visualization. From 9da5b793a9c92db3d178d51b7f946e4b613f2226 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:52:47 -0400 Subject: [PATCH 43/53] new workflow --- .github/workflows/quarto-publish.yml | 47 ++++++++++++++++------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 83a0c7e..225973e 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -4,10 +4,9 @@ on: name: Render and Publish -# you need these permissions to publish to GitHub pages permissions: - contents: write - pages: write + contents: write + pages: write jobs: build-deploy: @@ -16,32 +15,40 @@ jobs: - name: Check out repository uses: actions/checkout@v4 - - name: Set up Quarto - uses: quarto-dev/quarto-actions/setup@v2 - with: - # To install LaTeX to build PDF book - tinytex: true - # uncomment below and fill to pin a version - # version: SPECIFIC-QUARTO-VERSION-HERE - - # add software dependencies here and any libraries - - # From https://github.com/actions/setup-python - # - name: Setup Python - # uses: actions/setup-python@v3 - - # From https://github.com/r-lib/actions/tree/v2-branch/setup-r - name: Setup R uses: r-lib/actions/setup-r@v2 + with: + # r-version: '4.4.1' # Specify if needed + + - name: Cache R Packages + uses: actions/cache@v3 + with: + path: ${{ github.workspace }}/R/library + key: ${{ runner.os }}-R-${{ hashFiles('**/DESCRIPTION') }} + restore-keys: | + ${{ runner.os }}-R- - name: Install R Packages + env: + R_LIBS_USER: ${{ github.workspace }}/R/library run: | + mkdir -p $R_LIBS_USER install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") - # NOTE: If Publishing to GitHub Pages, set the permissions correctly (see top of this yaml) + - name: Verify R Packages Installation + run: | + Rscript -e "library(knitr, lib.loc = Sys.getenv('R_LIBS_USER'))" + Rscript -e "library(rmarkdown, lib.loc = Sys.getenv('R_LIBS_USER'))" + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + with: + tinytex: true + # version: '1.2.3' # Uncomment to pin Quarto version + - name: Publish to GitHub Pages (and render) uses: quarto-dev/quarto-actions/publish@v2 with: target: gh-pages env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # this secret is always available for github actions + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From a045736e63ad4b7305d78d4c971849b015640b77 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:57:51 -0400 Subject: [PATCH 44/53] Update quarto-publish.yml --- .github/workflows/quarto-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 225973e..b0da5f5 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -17,7 +17,7 @@ jobs: - name: Setup R uses: r-lib/actions/setup-r@v2 - with: + # with: # r-version: '4.4.1' # Specify if needed - name: Cache R Packages From b6d494ea7c27c066427df66089f20e4b984c13df Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Tue, 29 Oct 2024 12:57:51 -0400 Subject: [PATCH 45/53] Update quarto-publish.yml --- .github/workflows/quarto-publish.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 225973e..9c480d4 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -4,6 +4,7 @@ on: name: Render and Publish +# Permissions required to publish to GitHub Pages permissions: contents: write pages: write @@ -17,8 +18,9 @@ jobs: - name: Setup R uses: r-lib/actions/setup-r@v2 - with: - # r-version: '4.4.1' # Specify if needed + # with: + # Optionally specify an R version + # r-version: '4.4.1' - name: Cache R Packages uses: actions/cache@v3 @@ -33,7 +35,7 @@ jobs: R_LIBS_USER: ${{ github.workspace }}/R/library run: | mkdir -p $R_LIBS_USER - install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/") + Rscript -e 'install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/", lib = Sys.getenv("R_LIBS_USER"))' - name: Verify R Packages Installation run: | From 6a1e1cf1219c6404ffc0f06a0f19dea29e4499cb Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 8 Nov 2024 09:45:45 -0500 Subject: [PATCH 46/53] added palmerpenguins, ggplot, plotly --- .github/workflows/quarto-publish.yml | 2 +- _quarto.yml | 4 +- scaling_your_r_project.qmd | 279 +++++++++++++++++++++++++++ 3 files changed, 283 insertions(+), 2 deletions(-) create mode 100644 scaling_your_r_project.qmd diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 9c480d4..8786b33 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -35,7 +35,7 @@ jobs: R_LIBS_USER: ${{ github.workspace }}/R/library run: | mkdir -p $R_LIBS_USER - Rscript -e 'install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/", lib = Sys.getenv("R_LIBS_USER"))' + Rscript -e 'install.packages(c("knitr", "rmarkdown", "ggplot", "palmerpenguins", "plotly"), repos = "https://cloud.r-project.org/", lib = Sys.getenv("R_LIBS_USER"))' - name: Verify R Packages Installation run: | diff --git a/_quarto.yml b/_quarto.yml index 005143b..faf9262 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -17,11 +17,13 @@ website: contents: - href: index.qmd text: Home + - href: scaling_your_r_project.qmd + text: Scaling Your R Project - meeting-notes.qmd - section: Tutorials contents: tutorials/*.*md - section: External Tutorials - contents: + contents: - text: "Count by Period w/ DBI" url: https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html diff --git a/scaling_your_r_project.qmd b/scaling_your_r_project.qmd new file mode 100644 index 0000000..600fa83 --- /dev/null +++ b/scaling_your_r_project.qmd @@ -0,0 +1,279 @@ +--- +title: "Scaling Your R Project: From Small Data to Big Data" +author: "Jake Peters" +date: "2024-04-27" +format: + html: + toc: true + toc-depth: 3 + theme: cerulean +--- + +## Introduction + +Transitioning an R project from handling small datasets to Big Data involves navigating a series of evolving challenges related to data size, complexity, processing speed, and infrastructure. This guide outlines the progression of these challenges and provides recommended R packages and solutions to address each phase effectively, focusing exclusively on **data manipulation and handling**. + + +| **Scale of Data** | **Challenges** | +|--------------|---------------------------------------------------------------------------------| +| **Small** | - Data Manipulation & Analysis
    - Reproducibility | +| **Medium** | - Performance Optimization
    - Memory Usage
    - Parallel Processing | +| **Large** | - Out-of-Memory Processing
    - Efficient I/O
    - Scalability | +| **Big** | - Distributed Computing
    - Integration with Big Data Ecosystems
    - Real-Time Processing
    - Scalability & Fault Tolerance | + + + +## 1. Small Data + +### Challenges + +- **Data Manipulation & Analysis:** Efficiently performing standard data operations and analyses. +- **Reproducibility:** Ensuring analyses are reproducible and well-documented. + +### Solutions & R Packages + +#### Data Manipulation + +- [**`dplyr`**](https://CRAN.R-project.org/package=dplyr)**:** Provides a grammar for data manipulation, enabling easy filtering, selecting, mutating, and summarizing. +- [**`tidyr`**](https://CRAN.R-project.org/package=tidyr)**:** Facilitates data tidying, ensuring datasets are in the right format for analysis. + +#### Data Import & Export + +- [**`readr`**](https://CRAN.R-project.org/package=readr)**:** Efficiently reads rectangular data (e.g., CSV, TSV). +- [**`readxl`**](https://CRAN.R-project.org/package=readxl)**:** Reads Excel files. + +#### Reproducibility + +- [**`RMarkdown`**](https://CRAN.R-project.org/package=rmarkdown)**:** Combines code, output, and narrative in a single document. + +------------------------------------------------------------------------ + +## 2. Medium Data + +As datasets grow larger but still generally fit into memory, the focus shifts to improving performance and efficiency. + +### Challenges + +- **Performance Optimization:** Reducing computation time for data manipulation and analysis. +- **Efficient Memory Usage:** Managing memory consumption to handle larger datasets without crashes. +- **Parallel Processing:** Utilizing multiple CPU cores to speed up computations. + +### Solutions & R Packages + +#### Efficient Data Handling + +- [**`data.table`**](https://CRAN.R-project.org/package=data.table)**:** Offers high-performance data manipulation with syntax similar to `dplyr` but optimized for speed and memory efficiency. + +#### Parallel Computing + +- [**`parallel`**](https://stat.ethz.ch/R-manual/R-devel/library/parallel/doc/parallel.pdf)**:** Base R package for parallel execution. +- [**`foreach`**](https://CRAN.R-project.org/package=foreach) and [**`doParallel`**](https://CRAN.R-project.org/package=doParallel)**:** Simplify parallel looping constructs. + +#### Memory Management + +- [**`pryr`**](https://CRAN.R-project.org/package=pryr)**:** Tools for tracking memory usage and optimizing memory consumption. + +#### Enhanced Data Import + +- [**`vroom`**](https://CRAN.R-project.org/package=vroom)**:** Fast reading of rectangular data by leveraging multithreading. + +------------------------------------------------------------------------ + +## 3. Large Data + +When datasets exceed available memory, strategies shift to out-of-memory data management and scalable storage solutions. + +### Challenges + +- **Out-of-Memory Data Processing:** Handling datasets that cannot be loaded entirely into RAM. +- **Efficient I/O Operations:** Minimizing read/write times for large datasets. +- **Scalability:** Ensuring solutions can handle increasing data sizes without significant performance degradation. + +### Solutions & R Packages + +#### Out-of-Memory Data Structures + +- [**`ff`**](https://CRAN.R-project.org/package=ff)**:** Stores data on disk while accessing it as if it were in memory. +- [**`bigmemory`**](https://CRAN.R-project.org/package=bigmemory)**:** Manages massive matrices with shared memory support. +- [**`disk.frame`**](https://CRAN.R-project.org/package=disk.frame)**:** Provides `data.frame`-like objects that are stored on disk, supporting parallel processing. + +#### Database Integration + +- [**`DBI`**](https://CRAN.R-project.org/package=DBI) and [**`dbplyr`**](https://CRAN.R-project.org/package=dbplyr)**:** Interface with various databases (e.g., SQL, PostgreSQL) allowing data manipulation using `dplyr` syntax without loading all data into R. +- [**`duckplyr`**](https://github.com/r-dbi/duckdb)**:** Integrates `dplyr` with **DuckDB**, an in-process SQL OLAP database management system, enabling efficient querying and manipulation of large datasets. +- [**`RSQLite`**](https://CRAN.R-project.org/package=RSQLite)**:** Lightweight, disk-based database. + +#### Efficient Data Import & Export + +- [**`arrow`**](https://CRAN.R-project.org/package=arrow)**:** Facilitates high-performance data interchange using the Apache Arrow format. + +#### Parallel and Distributed Computing + +- [**`future`**](https://CRAN.R-project.org/package=future) and [**`furrr`**](https://CRAN.R-project.org/package=furrr)**:** Simplify asynchronous and parallel processing workflows. + +------------------------------------------------------------------------ + +## 4. Big Data + +At the Big Data scale, data is often distributed across multiple machines or requires integration with distributed computing frameworks. Focusing on **Google BigQuery** as the primary DBMS, the challenges and solutions are tailored accordingly. + +### Challenges + +- **Distributed Computing:** Managing and processing data across multiple nodes or clusters. +- **Integration with Big Data Ecosystems:** Seamlessly working with technologies like Hadoop and Spark. +- **Real-Time Data Processing:** Handling streaming data with low latency. +- **Advanced Scalability and Fault Tolerance:** Ensuring systems can scale dynamically and recover from failures. + +### Solutions & R Packages + +#### Google BigQuery Integration + +- [**`bigrquery`**](https://CRAN.R-project.org/package=bigrquery)**:** Provides an interface to Google's BigQuery, allowing R users to perform SQL queries, manage datasets, and analyze large-scale data without needing to manage the underlying infrastructure. +- [**`dbplyr`**](https://CRAN.R-project.org/package=dbplyr)**:** Works with `bigrquery` to enable `dplyr`-style data manipulation directly on BigQuery tables. +- [**`duckplyr`**](https://github.com/r-dbi/duckdb)**:** While primarily for DuckDB, it can complement BigQuery workflows by handling intermediate large data manipulations efficiently. + +#### Apache Spark Integration + +- [**`sparklyr`**](https://CRAN.R-project.org/package=sparklyr)**:** Connects R to Apache Spark, enabling scalable data processing, machine learning, and integration with Spark’s ecosystem. + +#### Stream Processing + +- [**`sparklyr`**](https://CRAN.R-project.org/package=sparklyr)**:** Also supports structured streaming in Spark for handling real-time data. + +#### Parallel and High-Performance Computing + +- [**`future.batchtools`**](https://CRAN.R-project.org/package=future.batchtools)**:** Integrates the `future` package with batch job schedulers for distributed computing. + +#### Big Data Storage Formats + +- [**`arrow`**](https://CRAN.R-project.org/package=arrow)**:** Facilitates working with columnar storage formats optimized for Big Data. + +#### Cloud Integration + +- [**`bigrquery`**](https://CRAN.R-project.org/package=bigrquery)**:** Specifically integrates with Google BigQuery. +- [**`cloudml`**](https://CRAN.R-project.org/package=cloudml)**:** Interfaces with cloud-based machine learning services. + +------------------------------------------------------------------------ + +## Additional Considerations + +### Workflow Management + +- [**`drake`**](https://CRAN.R-project.org/package=drake) and [**`targets`**](https://CRAN.R-project.org/package=targets)**:** Manage complex workflows, ensuring reproducibility and efficiency as data scales. + +### Machine Learning & Modeling + +- [**`caret`**](https://CRAN.R-project.org/package=caret), [**`mlr3`**](https://CRAN.R-project.org/package=mlr3)**:** Scalable machine learning workflows that can integrate with parallel and distributed systems. +- [**`tidymodels`**](https://CRAN.R-project.org/package=tidymodels)**:** A collection of packages for modeling and machine learning that can scale with appropriate backend support. + +### Performance Monitoring & Optimization + +- [**`profvis`**](https://CRAN.R-project.org/package=profvis)**:** Profiling tool for R code to identify performance bottlenecks. +- [**`bench`**](https://CRAN.R-project.org/package=bench) and [**`microbenchmark`**](https://CRAN.R-project.org/package=microbenchmark)**:** Measure and compare the performance of different approaches. + +------------------------------------------------------------------------ + +## Categorized Package Overview + +For a clearer understanding, here's the table categorized by **Purpose** and **Scale of Data**: + +### Data Manipulation & Tidying + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `dplyr` | Small, Medium | Grammar for data manipulation. Enables filtering, selecting, mutating, etc. | [CRAN](https://CRAN.R-project.org/package=dplyr) | +| `tidyr` | Small | Tools for tidying data, ensuring datasets are in the right format for analysis. | [CRAN](https://CRAN.R-project.org/package=tidyr) | +| `data.table` | Medium | High-performance data manipulation optimized for speed and memory efficiency. | [CRAN](https://CRAN.R-project.org/package=data.table) | +| `disk.frame` | Large | `data.frame`-like objects stored on disk with support for parallel processing. | [CRAN](https://CRAN.R-project.org/package=disk.frame) | +| `duckplyr` | Large, Big | Integrates `dplyr` with DuckDB for efficient large dataset manipulation. | [GitHub](https://github.com/r-dbi/duckdb) | + +### Data Import & Export + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `readr` | Small | Efficiently reads rectangular data (e.g., CSV, TSV). | [CRAN](https://CRAN.R-project.org/package=readr) | +| `readxl` | Small | Reads Excel files. | [CRAN](https://CRAN.R-project.org/package=readxl) | +| `vroom` | Medium | Fast reading of rectangular data using multithreading. | [CRAN](https://CRAN.R-project.org/package=vroom) | +| `arrow` | Large, Big | High-performance data interchange using the Apache Arrow format. | [CRAN](https://CRAN.R-project.org/package=arrow) | + +### Reproducibility + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `RMarkdown` | Small, Medium, Big | Combines code, output, and narrative in a single document for reproducibility. | [CRAN](https://CRAN.R-project.org/package=rmarkdown) | + +### Parallel Computing + +| **Package** | **Scale** | **Description** | **Link** | +|--------------|--------------|-------------------------|--------------------| +| `parallel` | Medium, Large, Big | Base R package for parallel execution. | [Documentation](https://stat.ethz.ch/R-manual/R-devel/library/parallel/doc/parallel.pdf) | +| `foreach` | Medium, Large, Big | Simplifies parallel looping constructs. | [CRAN](https://CRAN.R-project.org/package=foreach) | +| `doParallel` | Medium, Large, Big | Backend for the `foreach` package to enable parallel processing. | [CRAN](https://CRAN.R-project.org/package=doParallel) | +| `future` | Medium, Large, Big | Simplifies asynchronous and parallel processing workflows. | [CRAN](https://CRAN.R-project.org/package=future) | +| `furrr` | Medium, Large, Big | Combines `future` with `purrr` for parallel mapping functions. | [CRAN](https://CRAN.R-project.org/package=furrr) | +| `future.batchtools` | Big | Integrates the `future` package with batch job schedulers for distributed computing. | [CRAN](https://CRAN.R-project.org/package=future.batchtools) | + +### Memory Management + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `pryr` | Medium | Tools for tracking and optimizing memory usage. | [CRAN](https://CRAN.R-project.org/package=pryr) | + +### Out-of-Memory Data Structures + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `ff` | Large | Stores data on disk while accessing it as if it were in memory. | [CRAN](https://CRAN.R-project.org/package=ff) | +| `bigmemory` | Large | Manages massive matrices with shared memory support. | [CRAN](https://CRAN.R-project.org/package=bigmemory) | +| `disk.frame` | Large | `data.frame`-like objects stored on disk with support for parallel processing. | [CRAN](https://CRAN.R-project.org/package=disk.frame) | + +### Database Integration + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `DBI` | Large, Big | Database interface definition for communication between R and DBMS. | [CRAN](https://CRAN.R-project.org/package=DBI) | +| `dbplyr` | Large, Big | `dplyr` backend for databases, allowing SQL-like data manipulation. | [CRAN](https://CRAN.R-project.org/package=dbplyr) | +| `duckplyr` | Large, Big | Integrates `dplyr` with DuckDB for efficient large dataset manipulation. | [GitHub](https://github.com/r-dbi/duckdb) | +| `RSQLite` | Large | Lightweight, disk-based database. | [CRAN](https://CRAN.R-project.org/package=RSQLite) | +| `bigrquery` | Big | Interface to Google's BigQuery for managing and querying large datasets. | [CRAN](https://CRAN.R-project.org/package=bigrquery) | + +### Machine Learning & Modeling + +| **Package** | **Scale** | **Description** | **Link** | +|------------------|------------------|--------------------|-----------------| +| `caret` | Medium to Big | Streamlined model training and tuning for machine learning. | [CRAN](https://CRAN.R-project.org/package=caret) | +| `mlr3` | Medium to Big | Modern, object-oriented machine learning framework. | [CRAN](https://CRAN.R-project.org/package=mlr3) | +| `tidymodels` | Medium to Big | Collection of packages for modeling and machine learning using tidy principles. | [CRAN](https://CRAN.R-project.org/package=tidymodels) | + +### Big Data Integration + +| **Package** | **Purpose** | **Description** | **Link** | +|----------------|--------------------------|----------------|----------------| +| `bigrquery` | Big Data Integration (BigQuery) | Interface to Google's BigQuery for managing and querying large datasets. | [CRAN](https://CRAN.R-project.org/package=bigrquery) | +| `sparklyr` | Big Data Integration (Spark) | Connects R to Apache Spark for scalable data processing and machine learning. | [CRAN](https://CRAN.R-project.org/package=sparklyr) | +| `cloudml` | Cloud Integration (Machine Learning) | Interfaces with cloud-based machine learning services. | [CRAN](https://CRAN.R-project.org/package=cloudml) | + +### Workflow Management + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-------------------|--------------------|-----------------| +| `drake` | Small to Big | Manages complex workflows with a focus on reproducibility and efficiency. | [CRAN](https://CRAN.R-project.org/package=drake) | +| `targets` | Small to Big | Successor to `drake` for pipeline management and workflow automation. | [CRAN](https://CRAN.R-project.org/package=targets) | + +### Performance Monitoring & Optimization + +| **Package** | **Scale** | **Description** | **Link** | +|---------------------|-----------------|------------------|-----------------| +| `profvis` | Small to Big | Profiling tool for R code to identify performance bottlenecks. | [CRAN](https://CRAN.R-project.org/package=profvis) | +| `bench` | Small to Big | Tools for measuring and comparing code performance. | [CRAN](https://CRAN.R-project.org/package=bench) | +| `microbenchmark` | Small to Big | Accurate timing of small code snippets. | [CRAN](https://CRAN.R-project.org/package=microbenchmark) | + +------------------------------------------------------------------------ + +## Final Summary + +Transitioning from small to Big Data in an R project requires a strategic approach to handle increasing data volumes and complexities. By focusing on data manipulation and handling, and leveraging the appropriate R packages categorized by purpose and scale, you can effectively manage and analyze large datasets. The removal of data visualization tools from this overview allows for a more concentrated focus on the core aspects of data management and processing, ensuring that your project remains efficient and scalable as it grows. + +Remember to continually assess your project's needs and adapt your toolset accordingly, ensuring that each phase of data scaling is supported by the best available resources. + +------------------------------------------------------------------------ From 4a4d2d642839c9efd3a4be419ba887e6a5de16f4 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 8 Nov 2024 09:45:45 -0500 Subject: [PATCH 47/53] added palmerpenguins, ggplot, plotly --- .github/workflows/quarto-publish.yml | 2 +- _quarto.yml | 4 +- scaling_your_r_project.qmd | 279 +++++++++++++++++++++++++++ 3 files changed, 283 insertions(+), 2 deletions(-) create mode 100644 scaling_your_r_project.qmd diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 9c480d4..8786b33 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -35,7 +35,7 @@ jobs: R_LIBS_USER: ${{ github.workspace }}/R/library run: | mkdir -p $R_LIBS_USER - Rscript -e 'install.packages(c("knitr", "rmarkdown"), repos = "https://cloud.r-project.org/", lib = Sys.getenv("R_LIBS_USER"))' + Rscript -e 'install.packages(c("knitr", "rmarkdown", "ggplot", "palmerpenguins", "plotly"), repos = "https://cloud.r-project.org/", lib = Sys.getenv("R_LIBS_USER"))' - name: Verify R Packages Installation run: | diff --git a/_quarto.yml b/_quarto.yml index 005143b..faf9262 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -17,11 +17,13 @@ website: contents: - href: index.qmd text: Home + - href: scaling_your_r_project.qmd + text: Scaling Your R Project - meeting-notes.qmd - section: Tutorials contents: tutorials/*.*md - section: External Tutorials - contents: + contents: - text: "Count by Period w/ DBI" url: https://analyticsphere.github.io/c4cp/articles/count_by_period_vignette.html diff --git a/scaling_your_r_project.qmd b/scaling_your_r_project.qmd new file mode 100644 index 0000000..600fa83 --- /dev/null +++ b/scaling_your_r_project.qmd @@ -0,0 +1,279 @@ +--- +title: "Scaling Your R Project: From Small Data to Big Data" +author: "Jake Peters" +date: "2024-04-27" +format: + html: + toc: true + toc-depth: 3 + theme: cerulean +--- + +## Introduction + +Transitioning an R project from handling small datasets to Big Data involves navigating a series of evolving challenges related to data size, complexity, processing speed, and infrastructure. This guide outlines the progression of these challenges and provides recommended R packages and solutions to address each phase effectively, focusing exclusively on **data manipulation and handling**. + + +| **Scale of Data** | **Challenges** | +|--------------|---------------------------------------------------------------------------------| +| **Small** | - Data Manipulation & Analysis
    - Reproducibility | +| **Medium** | - Performance Optimization
    - Memory Usage
    - Parallel Processing | +| **Large** | - Out-of-Memory Processing
    - Efficient I/O
    - Scalability | +| **Big** | - Distributed Computing
    - Integration with Big Data Ecosystems
    - Real-Time Processing
    - Scalability & Fault Tolerance | + + + +## 1. Small Data + +### Challenges + +- **Data Manipulation & Analysis:** Efficiently performing standard data operations and analyses. +- **Reproducibility:** Ensuring analyses are reproducible and well-documented. + +### Solutions & R Packages + +#### Data Manipulation + +- [**`dplyr`**](https://CRAN.R-project.org/package=dplyr)**:** Provides a grammar for data manipulation, enabling easy filtering, selecting, mutating, and summarizing. +- [**`tidyr`**](https://CRAN.R-project.org/package=tidyr)**:** Facilitates data tidying, ensuring datasets are in the right format for analysis. + +#### Data Import & Export + +- [**`readr`**](https://CRAN.R-project.org/package=readr)**:** Efficiently reads rectangular data (e.g., CSV, TSV). +- [**`readxl`**](https://CRAN.R-project.org/package=readxl)**:** Reads Excel files. + +#### Reproducibility + +- [**`RMarkdown`**](https://CRAN.R-project.org/package=rmarkdown)**:** Combines code, output, and narrative in a single document. + +------------------------------------------------------------------------ + +## 2. Medium Data + +As datasets grow larger but still generally fit into memory, the focus shifts to improving performance and efficiency. + +### Challenges + +- **Performance Optimization:** Reducing computation time for data manipulation and analysis. +- **Efficient Memory Usage:** Managing memory consumption to handle larger datasets without crashes. +- **Parallel Processing:** Utilizing multiple CPU cores to speed up computations. + +### Solutions & R Packages + +#### Efficient Data Handling + +- [**`data.table`**](https://CRAN.R-project.org/package=data.table)**:** Offers high-performance data manipulation with syntax similar to `dplyr` but optimized for speed and memory efficiency. + +#### Parallel Computing + +- [**`parallel`**](https://stat.ethz.ch/R-manual/R-devel/library/parallel/doc/parallel.pdf)**:** Base R package for parallel execution. +- [**`foreach`**](https://CRAN.R-project.org/package=foreach) and [**`doParallel`**](https://CRAN.R-project.org/package=doParallel)**:** Simplify parallel looping constructs. + +#### Memory Management + +- [**`pryr`**](https://CRAN.R-project.org/package=pryr)**:** Tools for tracking memory usage and optimizing memory consumption. + +#### Enhanced Data Import + +- [**`vroom`**](https://CRAN.R-project.org/package=vroom)**:** Fast reading of rectangular data by leveraging multithreading. + +------------------------------------------------------------------------ + +## 3. Large Data + +When datasets exceed available memory, strategies shift to out-of-memory data management and scalable storage solutions. + +### Challenges + +- **Out-of-Memory Data Processing:** Handling datasets that cannot be loaded entirely into RAM. +- **Efficient I/O Operations:** Minimizing read/write times for large datasets. +- **Scalability:** Ensuring solutions can handle increasing data sizes without significant performance degradation. + +### Solutions & R Packages + +#### Out-of-Memory Data Structures + +- [**`ff`**](https://CRAN.R-project.org/package=ff)**:** Stores data on disk while accessing it as if it were in memory. +- [**`bigmemory`**](https://CRAN.R-project.org/package=bigmemory)**:** Manages massive matrices with shared memory support. +- [**`disk.frame`**](https://CRAN.R-project.org/package=disk.frame)**:** Provides `data.frame`-like objects that are stored on disk, supporting parallel processing. + +#### Database Integration + +- [**`DBI`**](https://CRAN.R-project.org/package=DBI) and [**`dbplyr`**](https://CRAN.R-project.org/package=dbplyr)**:** Interface with various databases (e.g., SQL, PostgreSQL) allowing data manipulation using `dplyr` syntax without loading all data into R. +- [**`duckplyr`**](https://github.com/r-dbi/duckdb)**:** Integrates `dplyr` with **DuckDB**, an in-process SQL OLAP database management system, enabling efficient querying and manipulation of large datasets. +- [**`RSQLite`**](https://CRAN.R-project.org/package=RSQLite)**:** Lightweight, disk-based database. + +#### Efficient Data Import & Export + +- [**`arrow`**](https://CRAN.R-project.org/package=arrow)**:** Facilitates high-performance data interchange using the Apache Arrow format. + +#### Parallel and Distributed Computing + +- [**`future`**](https://CRAN.R-project.org/package=future) and [**`furrr`**](https://CRAN.R-project.org/package=furrr)**:** Simplify asynchronous and parallel processing workflows. + +------------------------------------------------------------------------ + +## 4. Big Data + +At the Big Data scale, data is often distributed across multiple machines or requires integration with distributed computing frameworks. Focusing on **Google BigQuery** as the primary DBMS, the challenges and solutions are tailored accordingly. + +### Challenges + +- **Distributed Computing:** Managing and processing data across multiple nodes or clusters. +- **Integration with Big Data Ecosystems:** Seamlessly working with technologies like Hadoop and Spark. +- **Real-Time Data Processing:** Handling streaming data with low latency. +- **Advanced Scalability and Fault Tolerance:** Ensuring systems can scale dynamically and recover from failures. + +### Solutions & R Packages + +#### Google BigQuery Integration + +- [**`bigrquery`**](https://CRAN.R-project.org/package=bigrquery)**:** Provides an interface to Google's BigQuery, allowing R users to perform SQL queries, manage datasets, and analyze large-scale data without needing to manage the underlying infrastructure. +- [**`dbplyr`**](https://CRAN.R-project.org/package=dbplyr)**:** Works with `bigrquery` to enable `dplyr`-style data manipulation directly on BigQuery tables. +- [**`duckplyr`**](https://github.com/r-dbi/duckdb)**:** While primarily for DuckDB, it can complement BigQuery workflows by handling intermediate large data manipulations efficiently. + +#### Apache Spark Integration + +- [**`sparklyr`**](https://CRAN.R-project.org/package=sparklyr)**:** Connects R to Apache Spark, enabling scalable data processing, machine learning, and integration with Spark’s ecosystem. + +#### Stream Processing + +- [**`sparklyr`**](https://CRAN.R-project.org/package=sparklyr)**:** Also supports structured streaming in Spark for handling real-time data. + +#### Parallel and High-Performance Computing + +- [**`future.batchtools`**](https://CRAN.R-project.org/package=future.batchtools)**:** Integrates the `future` package with batch job schedulers for distributed computing. + +#### Big Data Storage Formats + +- [**`arrow`**](https://CRAN.R-project.org/package=arrow)**:** Facilitates working with columnar storage formats optimized for Big Data. + +#### Cloud Integration + +- [**`bigrquery`**](https://CRAN.R-project.org/package=bigrquery)**:** Specifically integrates with Google BigQuery. +- [**`cloudml`**](https://CRAN.R-project.org/package=cloudml)**:** Interfaces with cloud-based machine learning services. + +------------------------------------------------------------------------ + +## Additional Considerations + +### Workflow Management + +- [**`drake`**](https://CRAN.R-project.org/package=drake) and [**`targets`**](https://CRAN.R-project.org/package=targets)**:** Manage complex workflows, ensuring reproducibility and efficiency as data scales. + +### Machine Learning & Modeling + +- [**`caret`**](https://CRAN.R-project.org/package=caret), [**`mlr3`**](https://CRAN.R-project.org/package=mlr3)**:** Scalable machine learning workflows that can integrate with parallel and distributed systems. +- [**`tidymodels`**](https://CRAN.R-project.org/package=tidymodels)**:** A collection of packages for modeling and machine learning that can scale with appropriate backend support. + +### Performance Monitoring & Optimization + +- [**`profvis`**](https://CRAN.R-project.org/package=profvis)**:** Profiling tool for R code to identify performance bottlenecks. +- [**`bench`**](https://CRAN.R-project.org/package=bench) and [**`microbenchmark`**](https://CRAN.R-project.org/package=microbenchmark)**:** Measure and compare the performance of different approaches. + +------------------------------------------------------------------------ + +## Categorized Package Overview + +For a clearer understanding, here's the table categorized by **Purpose** and **Scale of Data**: + +### Data Manipulation & Tidying + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `dplyr` | Small, Medium | Grammar for data manipulation. Enables filtering, selecting, mutating, etc. | [CRAN](https://CRAN.R-project.org/package=dplyr) | +| `tidyr` | Small | Tools for tidying data, ensuring datasets are in the right format for analysis. | [CRAN](https://CRAN.R-project.org/package=tidyr) | +| `data.table` | Medium | High-performance data manipulation optimized for speed and memory efficiency. | [CRAN](https://CRAN.R-project.org/package=data.table) | +| `disk.frame` | Large | `data.frame`-like objects stored on disk with support for parallel processing. | [CRAN](https://CRAN.R-project.org/package=disk.frame) | +| `duckplyr` | Large, Big | Integrates `dplyr` with DuckDB for efficient large dataset manipulation. | [GitHub](https://github.com/r-dbi/duckdb) | + +### Data Import & Export + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `readr` | Small | Efficiently reads rectangular data (e.g., CSV, TSV). | [CRAN](https://CRAN.R-project.org/package=readr) | +| `readxl` | Small | Reads Excel files. | [CRAN](https://CRAN.R-project.org/package=readxl) | +| `vroom` | Medium | Fast reading of rectangular data using multithreading. | [CRAN](https://CRAN.R-project.org/package=vroom) | +| `arrow` | Large, Big | High-performance data interchange using the Apache Arrow format. | [CRAN](https://CRAN.R-project.org/package=arrow) | + +### Reproducibility + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `RMarkdown` | Small, Medium, Big | Combines code, output, and narrative in a single document for reproducibility. | [CRAN](https://CRAN.R-project.org/package=rmarkdown) | + +### Parallel Computing + +| **Package** | **Scale** | **Description** | **Link** | +|--------------|--------------|-------------------------|--------------------| +| `parallel` | Medium, Large, Big | Base R package for parallel execution. | [Documentation](https://stat.ethz.ch/R-manual/R-devel/library/parallel/doc/parallel.pdf) | +| `foreach` | Medium, Large, Big | Simplifies parallel looping constructs. | [CRAN](https://CRAN.R-project.org/package=foreach) | +| `doParallel` | Medium, Large, Big | Backend for the `foreach` package to enable parallel processing. | [CRAN](https://CRAN.R-project.org/package=doParallel) | +| `future` | Medium, Large, Big | Simplifies asynchronous and parallel processing workflows. | [CRAN](https://CRAN.R-project.org/package=future) | +| `furrr` | Medium, Large, Big | Combines `future` with `purrr` for parallel mapping functions. | [CRAN](https://CRAN.R-project.org/package=furrr) | +| `future.batchtools` | Big | Integrates the `future` package with batch job schedulers for distributed computing. | [CRAN](https://CRAN.R-project.org/package=future.batchtools) | + +### Memory Management + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `pryr` | Medium | Tools for tracking and optimizing memory usage. | [CRAN](https://CRAN.R-project.org/package=pryr) | + +### Out-of-Memory Data Structures + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `ff` | Large | Stores data on disk while accessing it as if it were in memory. | [CRAN](https://CRAN.R-project.org/package=ff) | +| `bigmemory` | Large | Manages massive matrices with shared memory support. | [CRAN](https://CRAN.R-project.org/package=bigmemory) | +| `disk.frame` | Large | `data.frame`-like objects stored on disk with support for parallel processing. | [CRAN](https://CRAN.R-project.org/package=disk.frame) | + +### Database Integration + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-----------------|----------------------|-----------------| +| `DBI` | Large, Big | Database interface definition for communication between R and DBMS. | [CRAN](https://CRAN.R-project.org/package=DBI) | +| `dbplyr` | Large, Big | `dplyr` backend for databases, allowing SQL-like data manipulation. | [CRAN](https://CRAN.R-project.org/package=dbplyr) | +| `duckplyr` | Large, Big | Integrates `dplyr` with DuckDB for efficient large dataset manipulation. | [GitHub](https://github.com/r-dbi/duckdb) | +| `RSQLite` | Large | Lightweight, disk-based database. | [CRAN](https://CRAN.R-project.org/package=RSQLite) | +| `bigrquery` | Big | Interface to Google's BigQuery for managing and querying large datasets. | [CRAN](https://CRAN.R-project.org/package=bigrquery) | + +### Machine Learning & Modeling + +| **Package** | **Scale** | **Description** | **Link** | +|------------------|------------------|--------------------|-----------------| +| `caret` | Medium to Big | Streamlined model training and tuning for machine learning. | [CRAN](https://CRAN.R-project.org/package=caret) | +| `mlr3` | Medium to Big | Modern, object-oriented machine learning framework. | [CRAN](https://CRAN.R-project.org/package=mlr3) | +| `tidymodels` | Medium to Big | Collection of packages for modeling and machine learning using tidy principles. | [CRAN](https://CRAN.R-project.org/package=tidymodels) | + +### Big Data Integration + +| **Package** | **Purpose** | **Description** | **Link** | +|----------------|--------------------------|----------------|----------------| +| `bigrquery` | Big Data Integration (BigQuery) | Interface to Google's BigQuery for managing and querying large datasets. | [CRAN](https://CRAN.R-project.org/package=bigrquery) | +| `sparklyr` | Big Data Integration (Spark) | Connects R to Apache Spark for scalable data processing and machine learning. | [CRAN](https://CRAN.R-project.org/package=sparklyr) | +| `cloudml` | Cloud Integration (Machine Learning) | Interfaces with cloud-based machine learning services. | [CRAN](https://CRAN.R-project.org/package=cloudml) | + +### Workflow Management + +| **Package** | **Scale** | **Description** | **Link** | +|-----------------|-------------------|--------------------|-----------------| +| `drake` | Small to Big | Manages complex workflows with a focus on reproducibility and efficiency. | [CRAN](https://CRAN.R-project.org/package=drake) | +| `targets` | Small to Big | Successor to `drake` for pipeline management and workflow automation. | [CRAN](https://CRAN.R-project.org/package=targets) | + +### Performance Monitoring & Optimization + +| **Package** | **Scale** | **Description** | **Link** | +|---------------------|-----------------|------------------|-----------------| +| `profvis` | Small to Big | Profiling tool for R code to identify performance bottlenecks. | [CRAN](https://CRAN.R-project.org/package=profvis) | +| `bench` | Small to Big | Tools for measuring and comparing code performance. | [CRAN](https://CRAN.R-project.org/package=bench) | +| `microbenchmark` | Small to Big | Accurate timing of small code snippets. | [CRAN](https://CRAN.R-project.org/package=microbenchmark) | + +------------------------------------------------------------------------ + +## Final Summary + +Transitioning from small to Big Data in an R project requires a strategic approach to handle increasing data volumes and complexities. By focusing on data manipulation and handling, and leveraging the appropriate R packages categorized by purpose and scale, you can effectively manage and analyze large datasets. The removal of data visualization tools from this overview allows for a more concentrated focus on the core aspects of data management and processing, ensuring that your project remains efficient and scalable as it grows. + +Remember to continually assess your project's needs and adapt your toolset accordingly, ensuring that each phase of data scaling is supported by the best available resources. + +------------------------------------------------------------------------ From 1d38f03e1feb49674e380821f1d4ff73d524f183 Mon Sep 17 00:00:00 2001 From: Jake Peters Date: Fri, 24 Jan 2025 10:17:42 -0500 Subject: [PATCH 48/53] Add info_schema_tutorial --- _quarto.yml | 3 + info_schema_tutorial.qmd | 113 +++++++++++++++++++++++ tutorials/info_schema_tutorial.qmd | 113 +++++++++++++++++++++++ tutorials/project_specific_pseudo_ids.md | 106 +++++++++++++++++++++ 4 files changed, 335 insertions(+) create mode 100644 info_schema_tutorial.qmd create mode 100644 tutorials/info_schema_tutorial.qmd create mode 100644 tutorials/project_specific_pseudo_ids.md diff --git a/_quarto.yml b/_quarto.yml index faf9262..ff5de8d 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -1,6 +1,9 @@ project: type: website +execute: + freeze: auto + website: title: "Analytics at Scale" diff --git a/info_schema_tutorial.qmd b/info_schema_tutorial.qmd new file mode 100644 index 0000000..028c422 --- /dev/null +++ b/info_schema_tutorial.qmd @@ -0,0 +1,113 @@ +--- +title: "Querying BigQuery Information Schema" +author: "Jake Peters" +date: 2025-01-22 +--- + +## Introduction + +In this tutorial, we demonstrate how to use a Quarto document to query BigQuery's information schema and identify where specific Concept IDs are located in your database. By integrating R and SQL, you learn how to authenticate with BigQuery, execute custom queries, and dynamically construct search queries based on user-specified Concept IDs. + +## Prerequisites +- Before following this tutorial, ensure you have: R installed on your system. +- The following R packages installed: `bigrquery`, `dplyr`, `DBI`, `dbplyr`, and `glue`. +- Access to a BigQuery project and dataset with appropriate permissions to query the `INFORMATION_SCHEMA`. + +## Table of Contents: +1. **Setup and Authentication:** Load libraries, configure authentication, and establish a connection to BigQuery. +2. **Executing a Basic SQL Query:** Query the BigQuery `INFORMATION_SCHEMA.COLUMNS` to retrieve metadata that matches a specific pattern. +3. **Building a Dynamic Query Function:** Create an R function to dynamically generate SQL queries to search for Concept IDs. +4. **Putting It All Together:** Run the function and review the results. + +## Step 1: Setup and Authentication + +The Quarto document begins with a header that includes essential metadata (title, author, and date). The first code chunk loads the required libraries and prepares the environment. + +Next, the following R code chunk loads the necessary libraries and suppresses the output for a clean setup: + +```{r, warning=FALSE, message=FALSE} +library(bigrquery) +library(dplyr) +library(DBI) +library(dbplyr) +library(glue) + +# Authenticate with BigQuery +bigrquery::bq_auth() +``` + +## Step 2: Establishing the Database Connection + +Here, we specify the dataset and project, authenticate with BigQuery, and open a connection. Finally, we list the tables in the dataset to confirm that the connection is working: + +```{r} +# Specify dataset and project details +dataset <- "FlatConnect" +project <- "nih-nci-dceg-connect-dev" + +# Establish connection to BigQuery +con <- DBI::dbConnect(bigrquery::bigquery(), + project = project, + dataset = dataset, + billing = project) + +# List available tables to verify the connection +DBI::dbListTables(con) +``` + +## Step 3: Querying the Information Schema + +The next code block runs a SQL query on the `INFORMATION_SCHEMA.COLUMNS` to locate columns that contain specific Concept IDs. This query filters columns based on a pattern present in their names: + +```{sql, connection=con} +SELECT + table_catalog, + table_schema, + table_name, + column_name +FROM FlatConnect.INFORMATION_SCHEMA.COLUMNS +WHERE column_name + LIKE '%158409298%261863326%'; +``` + +## Step 4: Building a Dynamic Query Function + +To make the querying process more flexible, we define an R function named `get_schema_info`. This function accepts a vector of Concept IDs and a tier (development, staging, or production), constructs the appropriate SQL query dynamically, and returns the result: + +```{r} +get_schema_info <- function(con, cids, tier, dataset = 'FlatConnect') { + + # Determine the project based on the tier + project <- switch(tier, + dev = "nih-nci-dceg-connect-dev", + stg = "nih-nci-dceg-connect-stg-5519", + prod = "nih-nci-dceg-connect-prod-6d04") + + # Collapse the Concept IDs into a single string with "%" delimiters (e.g., "%cid1%cid2%cid3%") + cid_str <- paste0("%", paste(cids, collapse = "%"), "%") + print(glue("cid_str: {cid_str}\n\n")) + + # Construct the SQL query dynamically + sql <- glue::glue("SELECT table_catalog, table_schema, table_name, column_name + FROM `{project}.{dataset}`.INFORMATION_SCHEMA.COLUMNS + WHERE column_name LIKE '{cid_str}'") + print(glue("SQL Query: \n{sql}\n\n")) + + # Execute the query and store the result + result <- DBI::dbGetQuery(con, sql) + + return(result) +} + +# Execute the function with a vector of Concept IDs and store the output in df +df <- get_schema_info(con, c('158409298', '261863326'), tier='dev') + +# Display the query results +df +``` + +## Conclusion + +This tutorial has shown you how to query BigQuery's `INFORMATION_SCHEMA` to find the locations of specific Concept IDs within your database. By combining R scripting and SQL queries within a Quarto document, you can automate schema exploration and streamline the process of database management and analysis. + +Feel free to modify the query function or extend this approach for different types of metadata searches. Happy querying! diff --git a/tutorials/info_schema_tutorial.qmd b/tutorials/info_schema_tutorial.qmd new file mode 100644 index 0000000..4b77ac2 --- /dev/null +++ b/tutorials/info_schema_tutorial.qmd @@ -0,0 +1,113 @@ +--- +title: "Tutorial: Querying BigQuery Information Schema" +author: "Jake Peters" +date: 2025-01-22 +--- + +## Introduction + +In this tutorial, we demonstrate how to use a Quarto document to query BigQuery's information schema and identify where specific Concept IDs are located in your database. By integrating R and SQL, you learn how to authenticate with BigQuery, execute custom queries, and dynamically construct search queries based on user-specified Concept IDs. + +## Prerequisites +- Before following this tutorial, ensure you have: R installed on your system. +- The following R packages installed: `bigrquery`, `dplyr`, `DBI`, `dbplyr`, and `glue`. +- Access to a BigQuery project and dataset with appropriate permissions to query the `INFORMATION_SCHEMA`. + +## Table of Contents: +1. **Setup and Authentication:** Load libraries, configure authentication, and establish a connection to BigQuery. +2. **Executing a Basic SQL Query:** Query the BigQuery `INFORMATION_SCHEMA.COLUMNS` to retrieve metadata that matches a specific pattern. +3. **Building a Dynamic Query Function:** Create an R function to dynamically generate SQL queries to search for Concept IDs. +4. **Putting It All Together:** Run the function and review the results. + +## Step 1: Setup and Authentication + +The Quarto document begins with a header that includes essential metadata (title, author, and date). The first code chunk loads the required libraries and prepares the environment. + +Next, the following R code chunk loads the necessary libraries and suppresses the output for a clean setup: + +```{r, warning=FALSE, message=FALSE} +library(bigrquery) +library(dplyr) +library(DBI) +library(dbplyr) +library(glue) + +# Authenticate with BigQuery +bigrquery::bq_auth() +``` + +## Step 2: Establishing the Database Connection + +Here, we specify the dataset and project, authenticate with BigQuery, and open a connection. Finally, we list the tables in the dataset to confirm that the connection is working: + +```{r} +# Specify dataset and project details +dataset <- "FlatConnect" +project <- "nih-nci-dceg-connect-dev" + +# Establish connection to BigQuery +con <- DBI::dbConnect(bigrquery::bigquery(), + project = project, + dataset = dataset, + billing = project) + +# List available tables to verify the connection +DBI::dbListTables(con) +``` + +## Step 3: Querying the Information Schema + +The next code block runs a SQL query on the `INFORMATION_SCHEMA.COLUMNS` to locate columns that contain specific Concept IDs. This query filters columns based on a pattern present in their names: + +```{sql, connection=con} +SELECT + table_catalog, + table_schema, + table_name, + column_name +FROM FlatConnect.INFORMATION_SCHEMA.COLUMNS +WHERE column_name + LIKE '%158409298%261863326%'; +``` + +## Step 4: Building a Dynamic Query Function + +To make the querying process more flexible, we define an R function named `get_schema_info`. This function accepts a vector of Concept IDs and a tier (development, staging, or production), constructs the appropriate SQL query dynamically, and returns the result: + +```{r} +get_schema_info <- function(con, cids, tier, dataset = 'FlatConnect') { + + # Determine the project based on the tier + project <- switch(tier, + dev = "nih-nci-dceg-connect-dev", + stg = "nih-nci-dceg-connect-stg-5519", + prod = "nih-nci-dceg-connect-prod-6d04") + + # Collapse the Concept IDs into a single string with "%" delimiters (e.g., "%cid1%cid2%cid3%") + cid_str <- paste0("%", paste(cids, collapse = "%"), "%") + print(glue("cid_str: {cid_str}\n\n")) + + # Construct the SQL query dynamically + sql <- glue::glue("SELECT table_catalog, table_schema, table_name, column_name + FROM `{project}.{dataset}`.INFORMATION_SCHEMA.COLUMNS + WHERE column_name LIKE '{cid_str}'") + print(glue("SQL Query: \n{sql}\n\n")) + + # Execute the query and store the result + result <- DBI::dbGetQuery(con, sql) + + return(result) +} + +# Execute the function with a vector of Concept IDs and store the output in df +df <- get_schema_info(con, c('158409298', '261863326'), tier='dev') + +# Display the query results +df +``` + +## Conclusion + +This tutorial has shown you how to query BigQuery's `INFORMATION_SCHEMA` to find the locations of specific Concept IDs within your database. By combining R scripting and SQL queries within a Quarto document, you can automate schema exploration and streamline the process of database management and analysis. + +Feel free to modify the query function or extend this approach for different types of metadata searches. Happy querying! diff --git a/tutorials/project_specific_pseudo_ids.md b/tutorials/project_specific_pseudo_ids.md new file mode 100644 index 0000000..8498cf1 --- /dev/null +++ b/tutorials/project_specific_pseudo_ids.md @@ -0,0 +1,106 @@ +# Using Authorized Views for Project-Specific Pseudo IDs in BigQuery + +This tutorial shows you how to maintain a single "master" table with real Connect_ID values while exposing project-specific pseudo_IDs through authorized views. This approach avoids duplicating data across projects and ensures that each project sees only its own pseudo_IDs. The tutorial covers: + +1. Creating the master table. +2. Creating a UDF for project-specific pseudo_ID generation. +3. Creating a project-specific authorized view. +4. Key maintenance and governance points. + +------------------------------------------------------------------------ + +## Step 1: Create the Master Table + +Store your original data with real Connect_ID values in a master table. Limit access to this table so that only trusted service accounts or administrators can query it directly. + +``` sql +CREATE OR REPLACE TABLE your_project.master_dataset.master_table AS +SELECT + Connect_ID, + field_a, + field_b +FROM + source_of_truth; +``` + +*Note:* Be sure to set proper access controls on `master_table` to prevent unauthorized access to real IDs. + +------------------------------------------------------------------------ + +## Step 2: Create a UDF for Project-Specific Hashing + +Create a user-defined function that converts the real Connect_ID into a pseudo_ID. This function incorporates a project-specific salt to generate unique pseudo_IDs per project. + +``` sql +CREATE OR REPLACE FUNCTION your_project.your_dataset.func_project_pseudo_id( + real_id STRING, + project_name STRING +) RETURNS STRING AS ( + SAFE_CONVERT_BYTES_TO_STRING( + SHA256(CONCAT(real_id, '_SECRET_SALT_FOR_', project_name)) + ) +); +``` + +*Key Points:* + +- Adjust the salt (`'_SECRET_SALT_FOR_'`) per project as needed. + +- Ensure that the salt values are stored and managed securely. + +- This function ensures the same real Connect_ID will yield different pseudo_IDs for different projects. + +------------------------------------------------------------------------ + +## Step 3: Create a Project-Specific Authorized View + +Create a view for each project that selects data from the master table and applies the UDF to transform the Connect_ID into a pseudo_ID. Only grant users access to this view—not to the master table. + +For example, for "ProjectA": + +``` sql +CREATE OR REPLACE VIEW your_project.projectA_dataset.view_projectA AS +SELECT + your_project.your_dataset.func_project_pseudo_id(master_table.Connect_ID, 'ProjectA') AS pseudo_ID, + master_table.field_a, + master_table.field_b +FROM + your_project.master_dataset.master_table; +``` + +*Key Considerations:* - Grant the appropriate permissions so that ProjectA users only have access to `view_projectA`. - Each project can have a similar view but with a project-specific salt or project name. + +------------------------------------------------------------------------ + +## Maintenance and Governance + +When implementing this approach, consider the following maintenance and governance aspects: + +- **Single Source of Truth:**\ + Maintain only one master table, minimizing data duplication and the risk of inconsistencies. + +- **Access Control:**\ + Ensure that only authorized users have access to the master table. Each project should only access its authorized view. + +- **Data Updates:**\ + As the master table is updated, the authorized views remain valid and reflect the latest data without additional modifications. + +- **Security Reviews:**\ + Regularly review the security policies surrounding the mapping function and view access. Rotate salts or update security practices as necessary. + +- **Compliance:**\ + Ensure that your setup complies with your organization's data governance policies and any relevant privacy regulations. + +------------------------------------------------------------------------ + +By following these steps, you can keep a secure master dataset while exposing project-specific pseudo_IDs via authorized views. This approach limits the risk of cross-project re-identification and adheres to best practices in data security and governance. + +## Internal Example: + +NCCR - state registry + +- Johanna + +- Use a CBIIT-blessed algorithm + +- From af3af27c423826771d60556e6ad2366dcd67fa64 Mon Sep 17 00:00:00 2001 From: lorszag Date: Fri, 14 Mar 2025 14:34:48 -0400 Subject: [PATCH 49/53] Unit testing (Testthat) --- tutorials/example.R | 46 ++++++++++++++++++++++++++++++++++++++++ tutorials/test-example.R | 28 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 tutorials/example.R create mode 100644 tutorials/test-example.R diff --git a/tutorials/example.R b/tutorials/example.R new file mode 100644 index 0000000..f591875 --- /dev/null +++ b/tutorials/example.R @@ -0,0 +1,46 @@ +# load libraries +library(testthat) +library(usethis) + +# this creates the correct testing file +## replace "example" with your file name +usethis::use_test("example") + +# simple addition function +func1=function(num1, num2, num3){ + x = num1 + num2 + num3 + return(x) +} + +# division function, slightly more complicated +func2 = function(num1, num2){ + x = num1/num2 + return(x) +} + +# text concatenation +func3 = function(str1, str2){ + x = paste(str1, str2, sep = "") # putting no separation, which isn't what I want so that the test will fail + return(x) +} + +# creating a vector +func4 = function(num){ + x = rep(4, num) + return(x) +} + + +# creating a df +func5 = function(num1, num2){ + x = c(num1, num1+1, num1+2) + y = c(num2, num2+1, num2+2) + df = data.frame(x = x, y = y) + return(df) +} + +# Resources +## https://r-pkgs.org/testing-basics.html See Section 13.5.4 for specific expectations you can test +## https://rstudio.github.io/cheatsheets/html/package-development.html +## https://testthat.r-lib.org/reference/index.html +## https://matthieu-bruneaux.gitlab.io/guide-r-rstudio-git-gitlab/060-testing.html, more on expectations \ No newline at end of file diff --git a/tutorials/test-example.R b/tutorials/test-example.R new file mode 100644 index 0000000..c48c89b --- /dev/null +++ b/tutorials/test-example.R @@ -0,0 +1,28 @@ +library(testthat) + +# Source the function file +source("/Users/orszagl2/Documents/AdHoc/unit_testing/example.R") + +# function 1 tests +test_that("Function adds numbers properly", {expect_equal(func1(3, 5, 7), 15)}) +test_that("Function outputs error with non-numerical entry", {expect_error(func1(3, 5, "a"))}) + +# function 2 tests +test_that("Function divides properly", {expect_equal(func2(3, 2), 1.5)}) +test_that("Function outputs error with non-numerical entry", {expect_error(func2(3, "a"))}) +test_that("Function divides properly", {expect_equal(func2(3, 0), Inf)}) +test_that("Class is correct", {expect_type(func2(3, 2), 'double')}) + +# function 3 test, purposefully one that fails! +test_that("Concatenation works", {expect_equal(func3("Hi", "Jake"), "Hi Jake")}) +test_that("Class is correct", {expect_type(func3("Hi", "Jake"), 'character')}) + +# function 4 test +test_that("Length is correct", {expect_length(func4(2), 2)}) +test_that("Length is correct", {expect_true(length(func4(2))==2)}) # above rewritten +test_that("Class is correct", {expect_type(func4(2), 'double')}) + +## s3 class test (function 5) +### common s3 classes include data.frame, lm, glm, ggplot, factor +test_that("Class type works", {expect_s3_class(func5(3, 7), "data.frame")}) +## note exact = false is the default, so it could be a tibble and this test would still pass! From 3b37dd97fac312fa2d13f1d996b6fd2c6c95c26d Mon Sep 17 00:00:00 2001 From: Michelle Hudson Date: Wed, 13 May 2026 17:01:51 -0400 Subject: [PATCH 50/53] Adding common sql queries to queries folder --- queries/ab_message_variant_lookup.sql | 20 ++ ...ospecimen_refusal_and_collection_dates.sql | 141 +++++++++++ ...ne_survey_completion_r_logic_reference.sql | 24 ++ ...spec_collection_cup_not_returned_flags.sql | 37 +++ ...onnect_id_list_ordered_output_template.sql | 33 +++ queries/country_of_origin_lookup.sql | 39 +++ queries/date_timestamp_filtering_examples.sql | 32 +++ queries/deceased_hipaa_withdrawal_flags.sql | 37 +++ ...tified_site_race_sex_r_logic_reference.sql | 25 ++ queries/dhq3_reference_queries.sql | 71 ++++++ queries/duplicate_tokens_by_site.sql | 51 ++++ queries/ehr_birthdate_completeness_counts.sql | 147 ++++++++++++ ..._distinct_participant_counts_by_source.sql | 77 ++++++ ...formation_schema_column_search_queries.sql | 5 + queries/module1_sex_from_v1_v2.sql | 20 ++ ...t_age_groups_from_verification_and_dob.sql | 39 +++ ...ipant_physical_and_alternate_addresses.sql | 24 ++ queries/participant_twin_status_lookup.sql | 22 ++ queries/race_group_r_logic_reference.sql | 76 ++++++ queries/reinvitation_campaign_type_lookup.sql | 21 ++ queries/site_code_to_label_case_snippet.sql | 17 ++ .../survey_completion_counts_by_age_group.sql | 59 +++++ .../survey_status_case_mapping_snippets.sql | 171 +++++++++++++ .../system_time_as_of_timestamp_snippet.sql | 6 + ...ation_and_duplicate_type_case_snippets.sql | 40 ++++ queries/verification_by_outreach_status.sql | 224 ++++++++++++++++++ .../yes_no_flag_temp_function_template.sql | 28 +++ tutorials/ggplot_templates.qmd | 7 +- 28 files changed, 1490 insertions(+), 3 deletions(-) create mode 100644 queries/ab_message_variant_lookup.sql create mode 100644 queries/baseline_biospecimen_refusal_and_collection_dates.sql create mode 100644 queries/baseline_survey_completion_r_logic_reference.sql create mode 100644 queries/biospec_collection_cup_not_returned_flags.sql create mode 100644 queries/connect_id_list_ordered_output_template.sql create mode 100644 queries/country_of_origin_lookup.sql create mode 100644 queries/date_timestamp_filtering_examples.sql create mode 100644 queries/deceased_hipaa_withdrawal_flags.sql create mode 100644 queries/deidentified_site_race_sex_r_logic_reference.sql create mode 100644 queries/dhq3_reference_queries.sql create mode 100644 queries/duplicate_tokens_by_site.sql create mode 100644 queries/ehr_birthdate_completeness_counts.sql create mode 100644 queries/ehr_distinct_participant_counts_by_source.sql create mode 100644 queries/information_schema_column_search_queries.sql create mode 100644 queries/module1_sex_from_v1_v2.sql create mode 100644 queries/participant_age_groups_from_verification_and_dob.sql create mode 100644 queries/participant_physical_and_alternate_addresses.sql create mode 100644 queries/participant_twin_status_lookup.sql create mode 100644 queries/race_group_r_logic_reference.sql create mode 100644 queries/reinvitation_campaign_type_lookup.sql create mode 100644 queries/site_code_to_label_case_snippet.sql create mode 100644 queries/survey_completion_counts_by_age_group.sql create mode 100644 queries/survey_status_case_mapping_snippets.sql create mode 100644 queries/system_time_as_of_timestamp_snippet.sql create mode 100644 queries/verification_and_duplicate_type_case_snippets.sql create mode 100644 queries/verification_by_outreach_status.sql create mode 100644 queries/yes_no_flag_temp_function_template.sql diff --git a/queries/ab_message_variant_lookup.sql b/queries/ab_message_variant_lookup.sql new file mode 100644 index 0000000..24a374c --- /dev/null +++ b/queries/ab_message_variant_lookup.sql @@ -0,0 +1,20 @@ +-- Source sheet: AB Variable +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +token, +state_d_956485028, +CASE +WHEN state_d_956485028 = '562663942' THEN 'Altruism Personal' +WHEN state_d_956485028 = '686986259' THEN 'Altruism General' +WHEN state_d_956485028 = '477331464' THEN 'Cancer Connection Personal' +WHEN state_d_956485028 = '935486262' THEN 'Cancer Connection General' +WHEN state_d_956485028 = '518814501' THEN 'Research Personal' +WHEN state_d_956485028 = '307763550' THEN 'Research General' +ELSE NULL +END +AS RcrtSI_ABMessage_v1r0 +FROM +`nih-nci-dceg-connect-stg-5519.FlatConnect.participants` +WHERE +token IN ( ) diff --git a/queries/baseline_biospecimen_refusal_and_collection_dates.sql b/queries/baseline_biospecimen_refusal_and_collection_dates.sql new file mode 100644 index 0000000..9ca8322 --- /dev/null +++ b/queries/baseline_biospecimen_refusal_and_collection_dates.sql @@ -0,0 +1,141 @@ +-- Source sheet: Biospecimen 1525 +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +Connect_ID, +d_685002411_d_217367618 AS `Refused Baseline Specimen Surveys`, +d_657475009 AS `Refused Baseline specimen surveys date`, +d_561681068 AS `Research collection of Baseline blood sample date`, +d_173836415_d_266600170_d_982213346 AS `Clinical collection of Baseline blood sample date`, +d_847159717 AS `Research collection of Baseline urine sample date`, +d_173836415_d_266600170_d_139245758 AS `Clinical collection of Baseline urine sample date`, +d_448660695 AS `Research collection of Baseline MW sample date` + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` + +WHERE + +d_685002411_d_217367618 = "353358909" + + + + + +WITH refusals AS ( +SELECT +Connect_ID, +-- refusal indicator and date +d_685002411_d_217367618 AS refused_biospec_survey, +d_657475009 AS refusal_date, + +-- specimen collection dates +d_561681068 AS research_blood_date, +d_173836415_d_266600170_d_982213346 AS clinical_blood_date, +d_847159717 AS research_urine_date, +d_173836415_d_266600170_d_139245758 AS clinical_urine_date, +d_448660695 AS research_mw_date +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_685002411_d_217367618 = "353358909" -- refused +), + +classified AS ( +SELECT +*, +-- earliest specimen date across blood, urine, and mouthwash +LEAST( +research_blood_date, +clinical_blood_date, +research_urine_date, +clinical_urine_date, +research_mw_date +) AS earliest_specimen_date, + +-- determine whether refusal happened before specimen donation +CASE +WHEN refusal_date IS NOT NULL +AND ( +research_blood_date > refusal_date OR research_blood_date IS NULL +) +AND ( +clinical_blood_date > refusal_date OR clinical_blood_date IS NULL +) +AND ( +research_urine_date > refusal_date OR research_urine_date IS NULL +) +AND ( +clinical_urine_date > refusal_date OR clinical_urine_date IS NULL +) +AND ( +research_mw_date > refusal_date OR research_mw_date IS NULL +) +THEN 1 +ELSE 0 +END AS refused_before_any_specimen +FROM refusals +) + +SELECT +COUNT(*) AS total_refused, -- denominator +SUM(refused_before_any_specimen) AS refused_before_specimen, -- numerator +SAFE_DIVIDE(SUM(refused_before_any_specimen), COUNT(*)) AS proportion_refused_before_specimen +FROM classified; + + + + +AUTUMN'S CODE + +-- CALCULATE DENOMINATOR: +SELECT count(*) as denominator_count +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE +d_685002411_d_217367618 = "353358909" --refused biospecimen survey + +--- separate run +-- CALCULATE NUMERATOR: +-- Subset participant data as 'Timestamp_Flags' to create flags: +WITH Timestamp_Flags as ( +SELECT +Connect_ID, +d_657475009, +d_685002411_d_217367618, +d_173836415_d_266600170_d_561681068 AS research_blood_dt, +d_173836415_d_266600170_d_982213346 AS clinical_blood_dt, +d_173836415_d_266600170_d_847159717 AS research_urine_dt, +d_173836415_d_266600170_d_139245758 AS clinical_urine_dt, +d_173836415_d_266600170_d_448660695 AS research_MW_dt, +-- Create a flag for whenever collection occurred after refusal: +CASE WHEN d_173836415_d_266600170_d_561681068 > d_657475009 THEN "Yes" ELSE "No" END +AS research_blood_after_refusal, +CASE WHEN d_173836415_d_266600170_d_982213346 > d_657475009 THEN "Yes" ELSE "No" END +AS clinical_blood_after_refusal, +CASE WHEN d_173836415_d_266600170_d_847159717 > d_657475009 THEN "Yes" ELSE "No" END +AS research_urine_after_refusal, +CASE WHEN d_173836415_d_266600170_d_139245758 > d_657475009 THEN "Yes" ELSE "No" END +AS clinical_urine_after_refusal, +CASE WHEN d_173836415_d_266600170_d_448660695 > d_657475009 THEN "Yes" ELSE "No" END +AS research_MW_after_refusal +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +) +-- Select from Timestamp_Flags data: +SELECT +Connect_ID, +CASE WHEN d_685002411_d_217367618 = "353358909" THEN "Yes" ELSE "No" +END AS `Refused Baseline Specimen Survey`, +d_657475009 AS `Refused Baseline Specimen Survey Date`, +-- Include timestamp flag variables +research_blood_dt, +clinical_blood_dt, +research_urine_dt, +clinical_urine_dt, +research_MW_dt +FROM Timestamp_Flags +WHERE +d_685002411_d_217367618 = "353358909" --refused biospecimen survey +AND ( +research_blood_after_refusal = "Yes" +OR clinical_blood_after_refusal = "Yes" +OR research_urine_after_refusal = "Yes" +OR clinical_urine_after_refusal = "Yes" +OR research_MW_after_refusal = "Yes" +); diff --git a/queries/baseline_survey_completion_r_logic_reference.sql b/queries/baseline_survey_completion_r_logic_reference.sql new file mode 100644 index 0000000..9b0549f --- /dev/null +++ b/queries/baseline_survey_completion_r_logic_reference.sql @@ -0,0 +1,24 @@ +-- Source sheet: Completions +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +/* +This sheet contains reference logic that is not SQL; content preserved as a block comment. + +Baseline Survey Completions + +process = case_when(d_100767870 == 353358909 ~ "All", +d_949302066 != 231311385 ~ "None", +d_949302066 == 231311385 & d_536735468 == 231311385 & d_976570371 == 231311385 ~ "BOH, MRE, and SAS Sections", +d_949302066 == 231311385 & d_976570371 == 231311385 & d_663265240 == 231311385 ~ "BOH, SAS, and LAW Sections", +d_949302066 == 231311385 & d_536735468 == 231311385 & d_663265240 == 231311385 ~ "BOH, MRE, and LAW Sections", +d_949302066 == 231311385 & d_536735468 == 231311385 ~ "BOH and MRE Sections", +d_949302066 == 231311385 & d_976570371 == 231311385 ~ "BOH and SAS Sections", +d_949302066 == 231311385 & d_663265240 == 231311385 ~ "BOH and LAW Sections", +d_949302066 == 231311385 ~ "BOH Section only"), +BL_comp = case_when(d_100767870==353358909 ~ "All", +process=="None" ~ "None", +process=="BOH Section only" ~ "BOH Only", +TRUE ~ "2 or 3 Sections") +BSL_compl = case_when(d_100767870==353358909 ~ "All Baseline Modules Completed", +TRUE ~ "One or More Baseline Modules Not Completed") +*/ diff --git a/queries/biospec_collection_cup_not_returned_flags.sql b/queries/biospec_collection_cup_not_returned_flags.sql new file mode 100644 index 0000000..dd7c45d --- /dev/null +++ b/queries/biospec_collection_cup_not_returned_flags.sql @@ -0,0 +1,37 @@ +-- Source sheet: Biospec 990 +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +Connect_ID, +d_633640710_d_427719697 AS `Collection Cup Not Returned`, +d_633640710_d_309189173 AS `Collection Cup Leaked - Total Sample Loss` +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.kitAssembly` +WHERE +d_633640710_d_427719697 = "353358909" +OR d_633640710_d_309189173 = "353358909"; + + +-- w/ named variables + +SELECT +Connect_ID, +CASE +WHEN d_633640710_d_427719697 = "353358909" THEN "Yes" +WHEN d_633640710_d_427719697 = "104430631" THEN "No" +ELSE NULL +END AS `BioKit_CollCupNotRet_v1r0`, +/* +CASE +WHEN d_633640710_d_309189173 = "353358909" THEN "Yes" +WHEN d_633640710_d_309189173 = "104430631" THEN "No" +ELSE NULL +END AS `BioKit_CollCupLeakTotal_v1r0`, +*/ +d_259846815 AS `BioKit_MWCupID_v1r0`, +d_826941471 AS `BioKit_KitRecdTm_v1r0` +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.kitAssembly` +WHERE +d_633640710_d_427719697 IN ("353358909") +-- OR d_633640710_d_309189173 IN ("353358909"); diff --git a/queries/connect_id_list_ordered_output_template.sql b/queries/connect_id_list_ordered_output_template.sql new file mode 100644 index 0000000..50b3614 --- /dev/null +++ b/queries/connect_id_list_ordered_output_template.sql @@ -0,0 +1,33 @@ +-- Source sheet: ORDER BY CONNECTID +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH id_list AS ( +SELECT id AS Connect_ID, offset +FROM UNNEST([ +*Add list of ConnectIDs here +]) AS id WITH OFFSET +) + +SELECT +t.Connect_ID, + +CASE +WHEN t.d_878865966 = '104430631' THEN 'No' +WHEN t.d_878865966 = '353358909' THEN 'Yes' +END AS BioFin_BaseBloodCol_v1r0, + +CASE +WHEN t.d_684635302 = '104430631' THEN 'No' +WHEN t.d_684635302 = '353358909' THEN 'Yes' +END AS BioFin_BaseMouthCol_v1r0, + +CASE +WHEN t.d_167958071 = '104430631' THEN 'No' +WHEN t.d_167958071 = '353358909' THEN 'Yes' +END AS BioFin_BaseUrineCol_v1r0 + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` t +JOIN id_list i +ON t.Connect_ID = i.Connect_ID + +ORDER BY i.offset diff --git a/queries/country_of_origin_lookup.sql b/queries/country_of_origin_lookup.sql new file mode 100644 index 0000000..d4b9a3f --- /dev/null +++ b/queries/country_of_origin_lookup.sql @@ -0,0 +1,39 @@ +-- Source sheet: Country of Origin +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT + +Connect_ID, + +CASE +WHEN d_837244890_integer = '360734594' then 'France' +WHEN d_837244890_integer = '602510992' then 'Portugal' +WHEN d_837244890_integer = '581659944' then 'Thailand' +END as country_new_integer, + + +d_837244890_integer as country_new_integer, + +d_384576626 as country_old, +d_876546260 as birth_city, +d_337485417 as birth_state, +d_384576626 as birth_country, + +FROM `nih-nci-dceg-connect-dev.FlatConnect.participants` +WHERE Connect_ID in ( + + + + + + +) + +ORDER BY CASE Connect_ID +WHEN '' THEN 1 +WHEN '' THEN 2 +WHEN '' THEN 3 +WHEN '' THEN 4 +WHEN '' THEN 5 +ELSE 6 +END; diff --git a/queries/date_timestamp_filtering_examples.sql b/queries/date_timestamp_filtering_examples.sql new file mode 100644 index 0000000..d6a77df --- /dev/null +++ b/queries/date_timestamp_filtering_examples.sql @@ -0,0 +1,32 @@ +-- Source sheet: DATE vs. DATE(TIMESTAMP) +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WHERE +DATE(TIMESTAMP(d_914594314)) <= DATE('2025-08-01') + + + +-- EXAMPLE 2: +"Both groups will have the below conditionality: +RcrtV_Verification_v1r0 = 1 +AND HdWd_Activepart_v1r0 = 0 +AND HdRef_Allsrv_v1r0 = 0 +AND HdWd_Deceased_v1r0 =0 +AND HdWd_WdConsent_v1r0 = 0 + +Group 1 addtl conditionality: +AND RcrtV_VerificationTm_V1R0 LE 07/07/2024 + +Group 2 addtl conditionality: +AND RcrtV_VerificationTm_V1R0 GE 07/08/2024 AND LE 08/01/2025 " + + +WHERE +d_821247024 = '197316935' +AND d_906417725 = '104430631' +AND d_685002411_d_867203506 = '104430631' +AND d_987563196 = '104430631' +AND d_747006172 = '104430631' +-- AND DATE(TIMESTAMP(d_914594314)) <= DATE('2024-07-07') +AND DATE(TIMESTAMP(d_914594314)) >= DATE('2024-07-08') +AND DATE(TIMESTAMP(d_914594314)) <= DATE('2025-08-01') diff --git a/queries/deceased_hipaa_withdrawal_flags.sql b/queries/deceased_hipaa_withdrawal_flags.sql new file mode 100644 index 0000000..ed28315 --- /dev/null +++ b/queries/deceased_hipaa_withdrawal_flags.sql @@ -0,0 +1,37 @@ +-- Source sheet: Deceased_HIPPA Flags +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CREATE TEMP FUNCTION yes_no_flag(x STRING) + +AS ( +CASE +WHEN x = '104430631' THEN 'No' +WHEN x = '353358909' THEN 'Yes' +ELSE NULL +END +); +SELECT +Connect_ID, +CASE +WHEN d_912301837 = '208325815' THEN 'No Refusal' +WHEN d_912301837 = '622008261' THEN 'Refused some activities' +WHEN d_912301837 = '458508122' THEN 'Refused all future activities' +WHEN d_912301837 = '872012139' THEN 'Revoked HIPAA only' +WHEN d_912301837 = '854021266' THEN 'Withdrew consent' +WHEN d_912301837 = '241236037' THEN 'Data Destruction Requested' +WHEN d_912301837 = '884452262' THEN 'Data destroyed' +WHEN d_912301837 = '618686157' THEN 'Deceased' +END AS SMMet_PartStatus_v1r0, + +yes_no_flag(d_685002411_d_994064239) AS HdRef_Basesrv_v1r0, +yes_no_flag(d_685002411_d_194410742) AS HdRef_Baseblood_v1r0, +yes_no_flag(d_685002411_d_949501163) AS HdRef_Baseurine_v1r0, +yes_no_flag(d_685002411_d_277479354) AS HdRef_Basesaliva_v1r0, +yes_no_flag(d_685002411_d_867203506) AS HdRef_Allsrv_v1r0, +yes_no_flag(d_685002411_d_352996056) AS HdRef_Allsample_v1r0, +yes_no_flag(d_685002411_d_217367618) AS HdRef_BlSpecSrv_v1r0, +yes_no_flag(d_747006172) AS HdWd_WdConsent_v1r0, +yes_no_flag(d_906417725) AS HdWd_Activepart_v1r0, +yes_no_flag(d_773707518) AS HdWd_HIPAArevoked_v1r0, +yes_no_flag(d_831041022) AS HdWd_Destroydata_v1r0, +yes_no_flag(d_987563196) AS HdWd_Deceased_v1r0 diff --git a/queries/deidentified_site_race_sex_r_logic_reference.sql b/queries/deidentified_site_race_sex_r_logic_reference.sql new file mode 100644 index 0000000..12376bc --- /dev/null +++ b/queries/deidentified_site_race_sex_r_logic_reference.sql @@ -0,0 +1,25 @@ +-- Source sheet: De-identified site data +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +/* +This sheet contains reference logic that is not SQL; content preserved as a block comment. + +Race and Sex Data from Sites + +race = case_when(state_d_684926335 == '635279662' |state_d_849518448 == '768826601' | + state_d_119643471 == '635279662' | state_d_253532712=='723775357' ~ "White, Non-Hispanic" , + state_d_684926335 %in% c('232334767', '401335456') | + state_d_849518448 == '181769837' | + state_d_253532712 %in% c('153444133','572474909','308427446', + '211228524','277568192','611398522','181769837') | + state_d_119643471 %in% c('232334767','211228524','308427446','432722256', + '232663805','785578696','200929978','490725843','965998904') ~ "Other", + state_d_684926335 == '178420302' | + state_d_849518448 == '178420302' | + state_d_253532712 == '178420302' | + state_d_119643471 %in% c( '986445321','746038746','178420302') | + (is.na(state_d_119643471) & d_827220437 == '657167265') ~ "Unknown"), +sex = case_when(state_d_706256705 == '536341288' | state_d_435027713 == '536341288' ~ "Female", + state_d_706256705 == '654207589' | state_d_435027713 == '654207589' ~ "Male", + #state_d_706256705 == '830573274' ~ "Intersex or Other", # too small of a count for now, need to combine with unknown +*/ diff --git a/queries/dhq3_reference_queries.sql b/queries/dhq3_reference_queries.sql new file mode 100644 index 0000000..2523085 --- /dev/null +++ b/queries/dhq3_reference_queries.sql @@ -0,0 +1,71 @@ +-- Source sheet: DHQ3 +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT *, +d_148184166 as DHQ3Username, +d_262294850 as DHQ3ProcessedTm, +d_196723965 as DHQ3StudyID, + +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` + + +SELECT DISTINCT *, +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_148184166 IS NOT NULL + + +SELECT DISTINCT d_148184166 AS DHQ3Username +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_148184166 IS NOT NULL +ORDER BY DHQ3Username + +SELECT DISTINCT d_262294850 AS DHQ3ProcessedTm +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_262294850 IS NOT NULL + +SELECT DISTINCT d_196723965 AS DHQ3StudyID +FROM `nih-nci-dceg-connect-dev.Connect.dhqRawAnswers` +WHERE d_196723965 IS NOT NULL + + +SELECT *, +d_148184166 as DHQ3Username, +d_262294850 as DHQ3ProcessedTm, +d_196723965 as DHQ3StudyID, + +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` + + +SELECT DISTINCT d_148184166 AS DHQ3Username +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` +WHERE d_148184166 IS NOT NULL +ORDER BY DHQ3Username + +SELECT DISTINCT d_262294850 AS DHQ3ProcessedTm +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` +WHERE d_262294850 IS NOT NULL + +SELECT DISTINCT d_196723965 AS DHQ3StudyID +FROM `nih-nci-dceg-connect-dev.Connect.dhqDetailedAnalysis` +WHERE d_196723965 IS NOT NULL + + +SELECT *, +d_148184166 as DHQ3Username, +d_262294850 as DHQ3ProcessedTm, +d_196723965 as DHQ3StudyID, + +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` + +SELECT DISTINCT d_148184166 AS DHQ3Username +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` +WHERE d_148184166 IS NOT NULL +ORDER BY DHQ3Username + +SELECT DISTINCT d_262294850 AS DHQ3ProcessedTm +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` +WHERE d_262294850 IS NOT NULL + +SELECT DISTINCT d_196723965 AS DHQ3StudyID +FROM `nih-nci-dceg-connect-dev.Connect.dhqAnalysisResults` +WHERE d_196723965 IS NOT NULL diff --git a/queries/duplicate_tokens_by_site.sql b/queries/duplicate_tokens_by_site.sql new file mode 100644 index 0000000..4c71c18 --- /dev/null +++ b/queries/duplicate_tokens_by_site.sql @@ -0,0 +1,51 @@ +-- Source sheet: Duplicate Tokens +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT token, +state_studyId, +CASE d_827220437 +WHEN '125001209' THEN 'Kaiser Permanente Colorado' +WHEN '181769837' THEN 'Other' +WHEN '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN '303349821' THEN 'Marshfield Clinic Health System' +WHEN '327912200' THEN 'Kaiser Permanente Georgia' +WHEN '452412599' THEN 'Kaiser Permanente Northwest' +WHEN '517700004' THEN 'National Cancer Institute' +WHEN '531629870' THEN 'HealthPartners' +WHEN '548392715' THEN 'Henry Ford Health System' +WHEN '657167265' THEN 'Sanford Health' +WHEN '809703864' THEN 'University of Chicago Medicine' +ELSE NULL +END AS Site, +COUNT(*) AS dup_count +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +GROUP BY token, state_studyId, Site +HAVING dup_count > 1 + +SELECT +d_827220437, +Connect_ID, +ARRAY_AGG(DISTINCT token) AS tokens, +COUNT(DISTINCT token) AS num_tokens +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE +Connect_ID IS NOT NULL AND d_827220437 IS NOT NULL +GROUP BY +d_827220437, Connect_ID +HAVING +COUNT(DISTINCT token) > 1 + + +SELECT +Connect_ID, +ARRAY_AGG(DISTINCT token) AS tokens, +COUNT(DISTINCT token) AS num_tokens +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE +Connect_ID IS NOT NULL +GROUP BY +Connect_ID +HAVING +COUNT(DISTINCT token) > 1 diff --git a/queries/ehr_birthdate_completeness_counts.sql b/queries/ehr_birthdate_completeness_counts.sql new file mode 100644 index 0000000..d52dfc1 --- /dev/null +++ b/queries/ehr_birthdate_completeness_counts.sql @@ -0,0 +1,147 @@ +-- Source sheet: EHR Age Counts +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH combined AS ( +SELECT +'ehr_healthpartners' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_healthpartners.person` + +UNION ALL +SELECT +'ehr_henry_ford' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_henry_ford.person` + +UNION ALL +SELECT +'ehr_kp_colorado' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_colorado.person` + +UNION ALL +SELECT +'ehr_kp_georgia' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_georgia.person` + +UNION ALL +SELECT +'ehr_kp_hawaii' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_hawaii.person` + +UNION ALL +SELECT +'ehr_kp_northwest' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_kp_northwest.person` + +UNION ALL +SELECT +'ehr_marshfield' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_marshfield.person` + +UNION ALL +SELECT +'ehr_uchicago' AS source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime +FROM `nih-nci-dceg-connect-prod-6d04.ehr_uchicago.person` +), + +with_age AS ( +SELECT +source, +year_of_birth, +month_of_birth, +day_of_birth, +birth_datetime, +SAFE.PARSE_DATE( +'%Y-%m-%d', +FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1)) +) AS DOB, +DATE_DIFF( +CURRENT_DATE(), +SAFE.PARSE_DATE( +'%Y-%m-%d', +FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1)) +), +YEAR +) AS age, +CASE +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) < 30 THEN '<30' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 30 AND 39 THEN '30-39' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 40 AND 49 THEN '40-49' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 50 AND 59 THEN '50-59' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 60 AND 69 THEN '60-69' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) BETWEEN 70 AND 79 THEN '70-79' +WHEN DATE_DIFF(CURRENT_DATE(), SAFE.PARSE_DATE('%Y-%m-%d', FORMAT('%04d-%02d-%02d', year_of_birth, COALESCE(month_of_birth, 1), COALESCE(day_of_birth, 1))), YEAR) >= 80 THEN '80+' +ELSE NULL +END AS age_group +FROM combined +) + +-- 1️⃣ Count by source and age_group +, counts_by_source AS ( +SELECT +source, +age_group, +COUNT(*) AS person_count +FROM with_age +WHERE age_group IS NOT NULL +GROUP BY source, age_group +) + +-- 2️⃣ Count by age_group overall +, counts_overall AS ( +SELECT +age_group, +COUNT(*) AS total_persons +FROM with_age +WHERE age_group IS NOT NULL +GROUP BY age_group +) + +-- Final output: combine both summaries +SELECT +'By Source' AS summary_type, +source, +age_group, +person_count AS count +FROM counts_by_source + +UNION ALL + +SELECT +'Overall' AS summary_type, +NULL AS source, +age_group, +total_persons AS count +FROM counts_overall + +ORDER BY summary_type, source, age_group; diff --git a/queries/ehr_distinct_participant_counts_by_source.sql b/queries/ehr_distinct_participant_counts_by_source.sql new file mode 100644 index 0000000..55f15cc --- /dev/null +++ b/queries/ehr_distinct_participant_counts_by_source.sql @@ -0,0 +1,77 @@ +-- Source sheet: EHR Counts +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH base AS ( +SELECT DISTINCT Connect_ID +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = '197316935' +AND d_747006172 = '104430631' +AND d_773707518 = '104430631' +), +counts AS ( +SELECT 'ehr_healthpartners' AS ehr_source, +COUNT(DISTINCT b.Connect_ID) AS distinct_count +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_healthpartners.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_henry_ford', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_henry_ford.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_colorado', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_colorado.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_georgia', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_georgia.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_hawaii', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_hawaii.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_kp_northwest', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_kp_northwest.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_marshfield', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_marshfield.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_sanford', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_sanford.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) + +UNION ALL +SELECT 'ehr_uchicago', +COUNT(DISTINCT b.Connect_ID) +FROM base b +JOIN `nih-nci-dceg-connect-prod-6d04.ehr_uchicago.person` p +ON CAST(p.person_id AS NUMERIC) = CAST(b.Connect_ID AS NUMERIC) +) + +SELECT * +FROM counts +ORDER BY ehr_source; diff --git a/queries/information_schema_column_search_queries.sql b/queries/information_schema_column_search_queries.sql new file mode 100644 index 0000000..9e57626 --- /dev/null +++ b/queries/information_schema_column_search_queries.sql @@ -0,0 +1,5 @@ +-- Source sheet: Search SCHEMAS +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +select * from Connect.INFORMATION_SCHEMA.COLUMNS; +select * from FlatConnect.INFORMATION_SCHEMA.COLUMNS; diff --git a/queries/module1_sex_from_v1_v2.sql b/queries/module1_sex_from_v1_v2.sql new file mode 100644 index 0000000..99544d9 --- /dev/null +++ b/queries/module1_sex_from_v1_v2.sql @@ -0,0 +1,20 @@ +-- Source sheet: M1_M2_Sex +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +CASE +WHEN m1v1.d_407056417 = '536341288' OR m1v2.d_407056417 = '536341288' THEN 'Female' +WHEN m1v1.d_407056417 = '654207589' OR m1v2.d_407056417 = '654207589' THEN 'Male' +WHEN m1v1.d_407056417 IN ('576796184','830573274') OR m1v2.d_407056417 IN ('576796184','830573274') THEN 'Intersex/Other' +WHEN m1v1.d_407056417 IS NULL OR m1v2.d_407056417 IS NULL THEN 'N/A' +END AS Sex_Reported, +COALESCE(m1v2.Connect_ID, m1v1.Connect_ID) AS Connect_ID, +COALESCE(m1v2.D_407056417, m1v1.D_407056417) AS sr_sex +FROM +`nih-nci-dceg-connect-prod-6d04.FlatConnect.module1_v2_JP` m1v2 +FULL OUTER JOIN +`nih-nci-dceg-connect-prod-6d04.FlatConnect.module1_v1_JP` m1v1 +ON +m1v1.Connect_ID = m1v2.Connect_ID +WHERE +COALESCE(m1v2.Connect_ID, m1v1.Connect_ID) IN ( diff --git a/queries/participant_age_groups_from_verification_and_dob.sql b/queries/participant_age_groups_from_verification_and_dob.sql new file mode 100644 index 0000000..a000e76 --- /dev/null +++ b/queries/participant_age_groups_from_verification_and_dob.sql @@ -0,0 +1,39 @@ +-- Source sheet: Age Groups from Participants +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH typed AS ( +SELECT +-- Verification date: ISO 8601 format (e.g., 2020-07-17T15:21:26.763Z) +DATE(SAFE.PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%Ez', CAST(d_914594314 AS STRING))) AS verif_date, +-- DOB: yyyymmdd format (string or int) +SAFE.PARSE_DATE('%Y%m%d', CAST(d_371067537 AS STRING)) AS dob +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = "197316935" +), +ages AS ( +SELECT +DATE_DIFF(verif_date, dob, YEAR) AS age_years +FROM typed +WHERE verif_date IS NOT NULL AND dob IS NOT NULL +) +SELECT +CASE +WHEN age_years BETWEEN 30 AND 39 THEN '30-39' +WHEN age_years BETWEEN 40 AND 49 THEN '40-49' +WHEN age_years BETWEEN 50 AND 59 THEN '50-59' +WHEN age_years BETWEEN 60 AND 69 THEN '60-69' +WHEN age_years BETWEEN 70 AND 79 THEN '70-79' +ELSE 'Other' +END AS age_group, +COUNT(*) AS count +FROM ages +GROUP BY age_group +ORDER BY +CASE age_group +WHEN '30-39' THEN 1 +WHEN '40-49' THEN 2 +WHEN '50-59' THEN 3 +WHEN '60-69' THEN 4 +WHEN '70-79' THEN 5 +ELSE 6 +END; diff --git a/queries/participant_physical_and_alternate_addresses.sql b/queries/participant_physical_and_alternate_addresses.sql new file mode 100644 index 0000000..07ec7e7 --- /dev/null +++ b/queries/participant_physical_and_alternate_addresses.sql @@ -0,0 +1,24 @@ +-- Source sheet: Physical Address +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT + +Connect_ID, + +d_284580415 as Alternative_address_line1, +d_728926441 as Alternative_address_line2, +d_907038282 as Alternative_address_city, +d_970839481 as Alternative_address_state, +d_379899229 as Alternative_address_zip, +d_810747471 as Alternate_address_PObox, + +d_207908218 as Phys_Address_line1, +d_224392018 as Phys_address_line2, +d_451993790 as Physical_address_city, +d_187799450 as Physical_address_state, +d_449168732 as Physical_address_zipcode, + + +FROM `nih-nci-dceg-connect-stg-5519.FlatConnect.participants` + +WHERE Connect_ID = diff --git a/queries/participant_twin_status_lookup.sql b/queries/participant_twin_status_lookup.sql new file mode 100644 index 0000000..5d47057 --- /dev/null +++ b/queries/participant_twin_status_lookup.sql @@ -0,0 +1,22 @@ +-- Source sheet: Twins +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +p.Connect_ID, +m.D_992987417, +CASE +WHEN m.D_992987417 = "104430631" THEN 'No' +WHEN m.D_992987417 = "353358909" THEN 'Yes' +WHEN m.D_992987417 = "288105839" THEN 'Yes, fraternal twins (not identical)' +WHEN m.D_992987417 = "626558982" THEN 'Yes, triplets or higher multiple birth' +ELSE 'Unknown' +END AS D_992987417_label +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` p +LEFT JOIN `nih-nci-dceg-connect-prod-6d04.FlatConnect.module1_v2` m +ON p.Connect_ID = m.Connect_ID +WHERE p.Connect_ID IN ( +"", +"", +"", +"" +); diff --git a/queries/race_group_r_logic_reference.sql b/queries/race_group_r_logic_reference.sql new file mode 100644 index 0000000..b86f7e6 --- /dev/null +++ b/queries/race_group_r_logic_reference.sql @@ -0,0 +1,76 @@ +-- Source sheet: Race groups +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +/* +This sheet contains reference logic that is not SQL; content preserved as a block comment. + +RACE CATEGORIES (updated Multi-Racial: March 2025) + +race_columns <- c( + "D_384191091_D_384191091_D_583826374", + "D_384191091_D_384191091_D_636411467", + "D_384191091_D_384191091_D_458435048", + "D_384191091_D_384191091_D_706998638", + "D_384191091_D_384191091_D_973565052", + "D_384191091_D_384191091_D_586825330", + "D_384191091_D_384191091_D_412790539", + "D_384191091_D_384191091_D_807835037" +) + +# All data is currently string values; convert "1" and "0" to numeric +# values before summarizing. +module1[race_columns] <- lapply(module1[race_columns], as.numeric) +module1$multi_racial <- ifelse(rowSums(module1[race_columns], na.rm = TRUE) > 1, 1, 0) + +which_race <- module1 %>% + mutate( + race = case_when( + multi_racial == 1 ~ "Multi-Racial", + D_384191091_D_384191091_D_583826374 == 1 ~ "American Indian or Native American", + D_384191091_D_384191091_D_636411467 == 1 ~ "Asian/Asian American", + D_384191091_D_384191091_D_458435048 == 1 ~ "Black, African American, or African", + D_384191091_D_384191091_D_706998638 == 1 ~ "Hispanic, Latino, or Spanish", + D_384191091_D_384191091_D_973565052 == 1 ~ "Middle Eastern or North African", + D_384191091_D_384191091_D_586825330 == 1 ~ "Hawaiian or Pacific Islander", + D_384191091_D_384191091_D_412790539 == 1 ~ "White", + D_384191091_D_384191091_D_807835037 == 1 | + !is.na(D_384191091_D_747350323) ~ "Other", + D_384191091_D_384191091_D_746038746 == 1 ~ "Prefer Not to Answer", + D_384191091_D_384191091_D_178420302 == 1 ~ "Unknown", + TRUE ~ "Skipped this question " + ) + ) + +dt_all_races_summary <- which_race %>% + dplyr::group_by(race) %>% + dplyr::summarize(n = n(), percentage = 100 * n / nrow(.)) %>% + dplyr::ungroup() %>% + dplyr::select(race, n, percentage) + +dt_all_races_summary %>% + gt::gt(rowname_col = "row_lab") %>% + fmt_number(columns = "percentage", decimals = 2) %>% + tab_header( + title = md("Race/Ethnicity of Participants Who Completed BOH Section of First Survey") + ) %>% + cols_label( + n = md("**N**"), + race = md("**Answer**"), + percentage = md("**%**") + ) %>% + grand_summary_rows( + columns = c(n, percentage), + fns = ~sum(., na.rm = TRUE) + ) |> + tab_options( + stub.font.weight = "bold" + ) %>% + tab_style( + style = list( + cell_text(weight = "bold") + ), + locations = cells_body( + columns = race + ) + ) +*/ diff --git a/queries/reinvitation_campaign_type_lookup.sql b/queries/reinvitation_campaign_type_lookup.sql new file mode 100644 index 0000000..d6c34fa --- /dev/null +++ b/queries/reinvitation_campaign_type_lookup.sql @@ -0,0 +1,21 @@ +-- Source sheet: ReInvitation Campaign Type +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT token, d_280021666,d_439351436,d_471593703, +CASE +WHEN d_280021666 = '926338735' THEN "Random" +WHEN d_280021666 = '348281054' THEN "Screening appointment" +WHEN d_280021666 = '324692899' THEN "Non-screening appointment" +WHEN d_280021666 = '351257378' THEN "Demographic Group" +WHEN d_280021666 = '647148178' THEN "Aging out of study" +WHEN d_280021666 = '834544960' THEN "Geographic group" +WHEN d_280021666 = '682916147' THEN "Post-Screening Selection" +WHEN d_280021666 = '153365143' THEN "Technology adapters" +WHEN d_280021666 = '663706936' THEN "Low-income/health professional shortage areas" +WHEN d_280021666 = '208952854' THEN "Research Registry" +WHEN d_280021666 = '296312382' THEN "Pop up" +WHEN d_280021666 = '181769837' THEN "Other" +WHEN d_280021666 = '398561594' THEN "None of these apply" +END AS RcrtSI_RInvCampaignType_v1r0 +FROM `nih-nci-dceg-connect-dev.FlatConnect.participants` +where token IN () diff --git a/queries/site_code_to_label_case_snippet.sql b/queries/site_code_to_label_case_snippet.sql new file mode 100644 index 0000000..31cc86b --- /dev/null +++ b/queries/site_code_to_label_case_snippet.sql @@ -0,0 +1,17 @@ +-- Source sheet: Site +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CASE +WHEN d_827220437 = '531629870' THEN 'HealthPartners' +WHEN d_827220437 = '548392715' THEN 'Henry Ford Health System' +WHEN d_827220437 = '125001209' THEN 'Kaiser Permanente Colorado' +WHEN d_827220437 = '327912200' THEN 'Kaiser Permanente Georgia' +WHEN d_827220437 = '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN d_827220437 = '452412599' THEN 'Kaiser Permanente Northwest' +WHEN d_827220437 = '303349821' THEN 'Marshfield Clinic Health System' +WHEN d_827220437 = '657167265' THEN 'Sanford Health' +WHEN d_827220437 = '809703864' THEN 'University of Chicago Medicine' +WHEN d_827220437 = '517700004' THEN 'National Cancer Institute' +WHEN d_827220437 = '472940358' THEN 'Baylor Scott & White Health' +WHEN d_827220437 = '181769837' THEN 'Other' +END AS RcrtES_Site_v1r0, diff --git a/queries/survey_completion_counts_by_age_group.sql b/queries/survey_completion_counts_by_age_group.sql new file mode 100644 index 0000000..666fc39 --- /dev/null +++ b/queries/survey_completion_counts_by_age_group.sql @@ -0,0 +1,59 @@ +-- Source sheet: Survey Completion by Age +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +SELECT +IFNULL(age_group, 'Total') AS age_group, +COUNTIF(completion = 'Completed Survey and Sample(s)') AS completed_survey_and_samples, +COUNTIF(completion = 'Completed Survey, No Sample(s)') AS completed_survey_no_samples, +COUNTIF(completion = 'Completed Sample(s), No Survey') AS completed_samples_no_survey, +COUNTIF(completion = 'Completed Neither') AS completed_neither, +COUNT(Connect_ID) AS total +FROM ( +SELECT +Connect_ID, +CASE +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2022 THEN '2022' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2023 THEN '2023' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2024 THEN '2024' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2025 AND EXTRACT(MONTH FROM TIMESTAMP(d_914594314)) BETWEEN 1 AND 3 THEN 'January - March 2025' +WHEN EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2025 AND EXTRACT(MONTH FROM TIMESTAMP(d_914594314)) BETWEEN 4 AND 7 THEN 'April - July 2025' +ELSE 'Other' +END AS time_category, +CASE +WHEN d_100767870 = "353358909" AND +(d_878865966 = "353358909" OR d_684635302 = "353358909" OR d_167958071 = "353358909") +THEN "Completed Survey and Sample(s)" +WHEN d_100767870 = "353358909" +THEN "Completed Survey, No Sample(s)" +WHEN d_878865966 = "353358909" OR d_684635302 = "353358909" OR d_167958071 = "353358909" +THEN "Completed Sample(s), No Survey" +ELSE "Completed Neither" +END AS completion, + +CASE +WHEN state_d_934298480 = '713781738' THEN '30-34' +WHEN state_d_934298480 = '631272782' THEN '35-39' +WHEN state_d_934298480 = '124276120' THEN '40-45' +WHEN state_d_934298480 = '450985724' THEN '46-50' +WHEN state_d_934298480 = '363147933' THEN '51-55' +WHEN state_d_934298480 = '636706443' THEN '56-60' +WHEN state_d_934298480 = '771230670' THEN '61-65' +WHEN state_d_934298480 = '722846087' THEN '66-70' +ELSE 'Unknown' +END AS age_group + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants_JP` +WHERE +d_827220437 = '809703864' -- UChicago +AND d_821247024 = '197316935' -- Verified +AND EXTRACT(YEAR FROM TIMESTAMP(d_914594314)) = 2025 +AND EXTRACT(MONTH FROM TIMESTAMP(d_914594314)) BETWEEN 4 AND 7 +) + +GROUP BY ROLLUP(age_group) +ORDER BY +CASE +WHEN age_group IS NULL THEN 999 -- Total row goes last +WHEN age_group = 'Unknown' THEN 998 +ELSE CAST(SUBSTR(age_group, 1, 2) AS INT64) +END; diff --git a/queries/survey_status_case_mapping_snippets.sql b/queries/survey_status_case_mapping_snippets.sql new file mode 100644 index 0000000..077bfa6 --- /dev/null +++ b/queries/survey_status_case_mapping_snippets.sql @@ -0,0 +1,171 @@ +-- Source sheet: Survey Statuses +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CASE +WHEN d_949302066 = '972455046' THEN 'Not Started' +WHEN d_949302066 = '615768760' THEN 'Started' +WHEN d_949302066 = '231311385' THEN 'Submitted' +END AS SrvBOH_BaseStatus_v1r0, + +D_205553981 AS SrvBOH_TmStart_v1r0, +D_517311251 AS SrvBOH_TmComplete_v1r0, + +-- MRE: Medications, Reproductive Health, Exercise, Sleep +CASE + +WHEN d_536735468 = '615768760' THEN 'Started' +WHEN d_536735468 = '231311385' THEN 'Submitted' +END AS SrvMRE_BaseStatus_v1r0, + +D_541836531 AS SrvMRE_TmStart_v1r0, +D_832139544 AS SrvMRE_TmComplete_v1r0, + +-- SAS: Smoking, Alcohol, Sun Exposure +CASE +WHEN d_976570371 = '972455046' THEN 'Not Started' +WHEN d_976570371 = '615768760' THEN 'Started' +WHEN d_976570371 = '231311385' THEN 'Submitted' +END AS SrvSAS_BaseStatus_v1r0, + +D_386488297 AS SrvSAS_TmStart_v1r0, +D_770257102 AS SrvSAS_TmComplete_v1r0, + +-- LAW: Where You Live and Work +CASE +WHEN d_663265240 = '972455046' THEN 'Not Started' +WHEN d_663265240 = '615768760' THEN 'Started' +WHEN d_663265240 = '231311385' THEN 'Submitted' +END AS SrvLAW_BaseStatus_v1r0, + +d_452942800 AS SrvLAW_TmStart_v1r0, +d_264644252 AS SrvLAW_TmComplete_v1r0, + +-- + +-- BIOSPECIMEN DATA: + +-- Blood/Urine/Mouthwash (BLM): +CASE +WHEN d_265193023 = '972455046' THEN 'Not Started' +WHEN d_265193023 = '615768760' THEN 'Started' +WHEN d_265193023 = '231311385' THEN 'Submitted' +END AS SrvBLM_ResSrvCompl_v1r0, + +d_822499427 as SrvBLM_TmStart_v1r0, +d_222161762 as SrvBLM_TmComplete_v1r0, + +-- Blood/Urine (BLU): +CASE +WHEN d_253883960 = '972455046' THEN 'Not Started' +WHEN d_253883960 = '615768760' THEN 'Started' +WHEN d_253883960 = '231311385' THEN 'Submitted' +END AS SrvBlU_BaseComplete_v1r0, + +d_534669573 as SrvBlU_TmStart_v1r0, +d_764863765 as SrvBlU_TmComplete_v1r0, + +-- Mouthwash (MW): +CASE +WHEN d_547363263 = '972455046' THEN 'Not Started' +WHEN d_547363263 = '615768760' THEN 'Started' +WHEN d_547363263 = '231311385' THEN 'Submitted' +END AS SrvMtW_BaseComplete_v1r0, + +d_286191859 as SrvMtW_TmStart_v1r0, +d_195145666 as SrvMtW_TmComplete_v1r0, + +-- + +-- MENSTRAL CYCLE: +CASE +WHEN d_459098666 = '972455046' THEN 'Not Started' +WHEN d_459098666 = '615768760' THEN 'Started' +WHEN d_459098666 = '231311385' THEN 'Submitted' +END AS SrvMC_BaseComplete_v1r0, + +d_844088537 AS SrvMC_TmStart_v1r0, +d_217640691 AS SrvMC_TmComplete_v1r0, + +-- + +-- COVID: +CASE +WHEN d_220186468 = '972455046' THEN 'Not Started' +WHEN d_220186468 = '615768760' THEN 'Started' +WHEN d_220186468 = '231311385' THEN 'Submitted' +END AS SrvCOV_BaseComplete_v1r0, + +d_268176409 AS SrvCOV_TmStart_v1r0, +d_784810139 AS SrvCOV_TmComplete_v1r0, + +-- + +-- QOL/PROMIS: +CASE +WHEN d_320303124 = '972455046' THEN 'Not Started' +WHEN d_320303124 = '615768760' THEN 'Started' +WHEN d_320303124 = '231311385' THEN 'Submitted' +END AS SrvQOL_3moStatus_v1r0, + +d_870643066 AS SrvQOL_3moTmStart_v1r0, +d_843688458 AS SrvQOL_3moTmComplete_v1r0, + +-- + +-- CES: +CASE +WHEN d_956490759 = '972455046' THEN 'Not Started' +WHEN d_956490759 = '615768760' THEN 'Started' +WHEN d_956490759 = '231311385' THEN 'Submitted' +END AS SrvCoE_ConExpStatus_v1r0, + +d_263355177 as SrvCoE_ConExpTmStart_v1r0, +d_199471989 as SrvCoE_ConExpTmCompl_v1r0, + +-- + +-- CSH: Cancer Screening History +CASE +WHEN d_176068627 = '972455046' THEN 'Not Started' +WHEN d_176068627 = '615768760' THEN 'Started' +WHEN d_176068627 = '231311385' THEN 'Submitted' +END AS SrvScr_CancScrnStatus_v1r0, + +d_609630315 as SrvScr_CancScrnTmStart_v1r0, +d_389890053 as SrvScr_CancScrnTmCompl_v1r0, + +-- + +-- SSN: +CASE +WHEN d_126331570 = '972455046' THEN 'Not Started' +WHEN d_126331570 = '615768760' THEN 'Started' +WHEN d_126331570 = '231311385' THEN 'Submitted' +END AS SrvSS_SSNSurvey_v1r0, + +d_943232079 AS SrvSS_TmStart_v1r0, +d_315032037 AS SrvSS_TmComplete_v1r0, + +-- + +-- DHQ3 (internal): +CASE +WHEN d_692560814 = '972455046' THEN 'Not Started' +WHEN d_692560814 = '615768760' THEN 'Started' +WHEN d_692560814 = '231311385' THEN 'Submitted' +END AS SrvDHQ3_6moStatus_v1r0, + +d_109610692 as SrvDHQ3_6moTmStart_v1r0, +d_610227793 as SrvDHQ3_6moTmComplete_v1r0, + +-- + +-- 2026 ROI: +CASE +WHEN d_278023676 = '972455046' THEN 'Not Started' +WHEN d_278023676 = '615768760' THEN 'Started' +WHEN d_278023676 = '231311385' THEN 'Submitted' +END AS SrvROI_PrefStatus_v1r0, + +d_993557295 AS SrvROI_PrefTmStart_v1r0, +d_543379310 AS SrvROI_PrefTmComplete_v1r0, diff --git a/queries/system_time_as_of_timestamp_snippet.sql b/queries/system_time_as_of_timestamp_snippet.sql new file mode 100644 index 0000000..34d459d --- /dev/null +++ b/queries/system_time_as_of_timestamp_snippet.sql @@ -0,0 +1,6 @@ +-- Source sheet: TIMESTAMP +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +FOR SYSTEM_TIME AS OF TIMESTAMP('2025-05-30 18:34:40.654 UTC') +WHERE diff --git a/queries/verification_and_duplicate_type_case_snippets.sql b/queries/verification_and_duplicate_type_case_snippets.sql new file mode 100644 index 0000000..ab608ad --- /dev/null +++ b/queries/verification_and_duplicate_type_case_snippets.sql @@ -0,0 +1,40 @@ +-- Source sheet: Verification Duplication +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CASE +WHEN d_821247024 = '875007964' THEN 'Not yet verified' +WHEN d_821247024 = '197316935' THEN 'Verified' +WHEN d_821247024 = '219863910' THEN 'Cannot be verified' +WHEN d_821247024 = '922622075' THEN 'Duplicate' +WHEN d_821247024 = '160161595' THEN 'Outreach timed out' +WHEN d_821247024 = '290379732' THEN 'No Longer Enrolling' +END AS RcrtV_Verification_v1r0, + + + +CASE +WHEN state_d_148197146 = '638335430' THEN 'Active recruit signed in as Passive recruit' +WHEN state_d_148197146 = '283434980' THEN 'Not Active recruit signed in as Passive recruit' +WHEN state_d_148197146 = '866029623' THEN 'Not Active recruit signed in as an Active recruit' +WHEN state_d_148197146 = '654558118' THEN 'Participant already enrolled' +WHEN state_d_148197146 = '979256174' THEN 'Passive recruit signed in as Active recruit' +WHEN state_d_148197146 = '696650324' THEN 'Change in Eligibility Status' +END AS Duplicate_Type, + + +CASE +WHEN d_512820379 = '180583933' THEN 'Not Active' +WHEN d_512820379 = '486306141' THEN 'Active' +WHEN d_512820379 = '854703046' THEN 'Passive' +END AS Recruit_Type, + + +CASE +WHEN state_d_793822265 = '132080040' THEN 'No Change Needed' +WHEN state_d_793822265 = '604663208' THEN 'Not Active to Passive' +WHEN state_d_793822265 = '854903954' THEN 'Passive to Active' +WHEN state_d_793822265 = '965707001' THEN 'Active to Passive' +END AS Update_Recruit_Type + + +d_471593703 as Recruitment_date diff --git a/queries/verification_by_outreach_status.sql b/queries/verification_by_outreach_status.sql new file mode 100644 index 0000000..eb672b7 --- /dev/null +++ b/queries/verification_by_outreach_status.sql @@ -0,0 +1,224 @@ +-- Source sheet: Verification-by-Outreach status +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +WITH base AS ( +SELECT +CASE +WHEN state_d_444699761 = '426360242' THEN 'Method Used' +WHEN state_d_444699761 = '734437214' THEN 'Method Not Used' +END AS Auto_Verif, + +CASE +WHEN state_d_953614051 = '426360242' THEN 'Method Used' +WHEN state_d_953614051 = '734437214' THEN 'Method Not Used' +END AS Manual_Verif, + +CASE +WHEN state_d_188797763 = '104430631' THEN 'No' +WHEN state_d_188797763 = '353358909' THEN 'Yes' +END AS Outreach_Required, + +CASE +WHEN d_827220437 = '531629870' THEN 'HealthPartners' +WHEN d_827220437 = '548392715' THEN 'Henry Ford Health System' +WHEN d_827220437 = '125001209' THEN 'Kaiser Permanente Colorado' +WHEN d_827220437 = '327912200' THEN 'Kaiser Permanente Georgia' +WHEN d_827220437 = '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN d_827220437 = '452412599' THEN 'Kaiser Permanente Northwest' +WHEN d_827220437 = '303349821' THEN 'Marshfield Clinic Health System' +WHEN d_827220437 = '657167265' THEN 'Sanford Health' +WHEN d_827220437 = '809703864' THEN 'University of Chicago Medicine' +WHEN d_827220437 = '517700004' THEN 'National Cancer Institute' +WHEN d_827220437 = '472940358' THEN 'Baylor Scott & White Health' +END AS Site + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = '219863910' -- Verification status = cannot be verified +AND d_831041022 = '104430631' -- Data destroy = no +AND state_d_444699761 IS NOT NULL +AND state_d_953614051 IS NOT NULL +AND state_d_188797763 IS NOT NULL +), + +counts AS ( +SELECT +Site, +Auto_Verif, +Manual_Verif, +Outreach_Required, +COUNT(*) AS sample_counts +FROM base +GROUP BY Site, Auto_Verif, Manual_Verif, Outreach_Required +), + +sites AS ( +SELECT * FROM UNNEST([ +'HealthPartners', +'Henry Ford Health System', +'Kaiser Permanente Colorado', +'Kaiser Permanente Georgia', +'Kaiser Permanente Hawaii', +'Kaiser Permanente Northwest', +'Marshfield Clinic Health System', +'Sanford Health', +'University of Chicago Medicine', +'National Cancer Institute', +'Baylor Scott & White Health' +]) AS Site +), + +grid AS ( +SELECT +s.Site, +Auto_Verif, +Manual_Verif, +Outreach_Required +FROM sites s +CROSS JOIN UNNEST(['Method Used','Method Not Used']) AS Auto_Verif +CROSS JOIN UNNEST(['Method Used','Method Not Used']) AS Manual_Verif +CROSS JOIN UNNEST(['Yes','No']) AS Outreach_Required +) + +SELECT +g.Site, +g.Auto_Verif, +g.Manual_Verif, +g.Outreach_Required, +COALESCE(c.sample_counts, 0) AS sample_counts +FROM grid g +LEFT JOIN counts c +ON g.Site = c.Site +AND g.Auto_Verif = c.Auto_Verif +AND g.Manual_Verif = c.Manual_Verif +AND g.Outreach_Required = c.Outreach_Required +ORDER BY +Site, +Auto_Verif, +Manual_Verif, +Outreach_Required; + + + + + + + + + + +WITH base AS ( +SELECT +CASE +WHEN state_d_444699761 = '426360242' THEN 'Method Used' +WHEN state_d_444699761 = '734437214' THEN 'Method Not Used' +WHEN state_d_444699761 IS NULL then 'Auto Verif Missing' +END AS Auto_Verif, + +CASE +WHEN state_d_953614051 = '426360242' THEN 'Method Used' +WHEN state_d_953614051 = '734437214' THEN 'Method Not Used' +WHEN state_d_953614051 IS NULL then 'Manual Verif Missing' +END AS Manual_Verif, + +CASE +WHEN state_d_188797763 = '104430631' THEN 'No' +WHEN state_d_188797763 = '353358909' THEN 'Yes' +WHEN state_d_188797763 IS NULL THEN 'Outreach Required Missing' +END AS Outreach_Required, + +CASE +WHEN d_827220437 = '531629870' THEN 'HealthPartners' +WHEN d_827220437 = '548392715' THEN 'Henry Ford Health System' +WHEN d_827220437 = '125001209' THEN 'Kaiser Permanente Colorado' +WHEN d_827220437 = '327912200' THEN 'Kaiser Permanente Georgia' +WHEN d_827220437 = '300267574' THEN 'Kaiser Permanente Hawaii' +WHEN d_827220437 = '452412599' THEN 'Kaiser Permanente Northwest' +WHEN d_827220437 = '303349821' THEN 'Marshfield Clinic Health System' +WHEN d_827220437 = '657167265' THEN 'Sanford Health' +WHEN d_827220437 = '809703864' THEN 'University of Chicago Medicine' +WHEN d_827220437 = '517700004' THEN 'National Cancer Institute' +WHEN d_827220437 = '472940358' THEN 'Baylor Scott & White Health' +END AS Site + +FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` +WHERE d_821247024 = '197316935' -- Verification status = verified +AND d_831041022 = '104430631' -- Data destroy = no +-- AND state_d_444699761 IS NOT NULL +-- AND state_d_953614051 IS NOT NULL +-- AND state_d_188797763 IS NOT NULL +), + +counts AS ( +SELECT +Site, +Auto_Verif, +Manual_Verif, +Outreach_Required, +COUNT(*) AS sample_counts +FROM base +GROUP BY Site, Auto_Verif, Manual_Verif, Outreach_Required +), + +sites AS ( +SELECT * FROM UNNEST([ +'HealthPartners', +'Henry Ford Health System', +'Kaiser Permanente Colorado', +'Kaiser Permanente Georgia', +'Kaiser Permanente Hawaii', +'Kaiser Permanente Northwest', +'Marshfield Clinic Health System', +'Sanford Health', +'University of Chicago Medicine', +'National Cancer Institute', +'Baylor Scott & White Health' +]) AS Site +), + +grid AS ( +SELECT +s.Site, +Auto_Verif, +Manual_Verif, +Outreach_Required +FROM sites s +CROSS JOIN UNNEST(['Method Used','Method Not Used', 'Auto Verif Missing']) AS Auto_Verif +CROSS JOIN UNNEST(['Method Used','Method Not Used', 'Manual Verif Missing']) AS Manual_Verif +CROSS JOIN UNNEST(['Yes','No', 'Outreach Required Missing']) AS Outreach_Required +) + +SELECT +g.Site, +g.Auto_Verif, +g.Manual_Verif, +g.Outreach_Required, +COALESCE(c.sample_counts, 0) AS sample_counts +FROM grid g +LEFT JOIN counts c +ON g.Site = c.Site +AND g.Auto_Verif = c.Auto_Verif +AND g.Manual_Verif = c.Manual_Verif +AND g.Outreach_Required = c.Outreach_Required +ORDER BY +Site, + +-- Auto_Verif order +CASE g.Auto_Verif +WHEN 'Method Not Used' THEN 1 +WHEN 'Method Used' THEN 2 +WHEN 'Auto Verif Missing' THEN 3 +END, + +-- Manual_Verif order +CASE g.Manual_Verif +WHEN 'Method Not Used' THEN 1 +WHEN 'Method Used' THEN 2 +WHEN 'Manual Verif Missing' THEN 3 +END, + +-- Outreach_Required order +CASE g.Outreach_Required +WHEN 'No' THEN 1 +WHEN 'Yes' THEN 2 +WHEN 'Outreach Required Missing' THEN 3 +END; diff --git a/queries/yes_no_flag_temp_function_template.sql b/queries/yes_no_flag_temp_function_template.sql new file mode 100644 index 0000000..b34bf7a --- /dev/null +++ b/queries/yes_no_flag_temp_function_template.sql @@ -0,0 +1,28 @@ +-- Source sheet: Yes_No_SQL_function +-- Generated from workbook: BigQuery_Data Queries_20260513.xlsx + +CREATE TEMP FUNCTION yes_no_flag(x STRING) +AS ( +CASE +WHEN x = '104430631' THEN 'No' +WHEN x = '353358909' THEN 'Yes' +ELSE NULL +END +); + +SELECT +Connect_ID, + + +yes_no_flag(d_685002411_d_994064239) AS HdRef_Basesrv_v1r0, +yes_no_flag(d_685002411_d_194410742) AS HdRef_Baseblood_v1r0, +yes_no_flag(d_685002411_d_949501163) AS HdRef_Baseurine_v1r0, +yes_no_flag(d_685002411_d_277479354) AS HdRef_Basesaliva_v1r0, +yes_no_flag(d_685002411_d_867203506) AS HdRef_Allsrv_v1r0, +yes_no_flag(d_685002411_d_352996056) AS HdRef_Allsample_v1r0, +yes_no_flag(d_685002411_d_217367618) AS HdRef_BlSpecSrv_v1r0, +yes_no_flag(d_747006172) AS HdWd_WdConsent_v1r0, +yes_no_flag(d_906417725) AS HdWd_Activepart_v1r0, +yes_no_flag(d_773707518) AS HdWd_HIPAArevoked_v1r0, +yes_no_flag(d_831041022) AS HdWd_Destroydata_v1r0, +yes_no_flag(d_987563196) AS HdWd_Deceased_v1r0 diff --git a/tutorials/ggplot_templates.qmd b/tutorials/ggplot_templates.qmd index a9fccf3..5e78ac8 100644 --- a/tutorials/ggplot_templates.qmd +++ b/tutorials/ggplot_templates.qmd @@ -11,10 +11,10 @@ editor: wrap: 72 --- - ## Purpose -In this session, we will review how to use ggplot themes and review a few templates that could be useful for future data visualization. +In this session, we will review how to use ggplot themes and review a +few templates that could be useful for future data visualization. ## Colors @@ -109,7 +109,8 @@ Looks much better, but we want to wrap it into a function. ## Creating & Using Function -Below is a function that you can add to a plot after specifying titles & the number of categories (colors) you need for your graph. +Below is a function that you can add to a plot after specifying titles & +the number of categories (colors) you need for your graph. ```{r} theme_function = function(title, xlab, ylab, legend_lab, n) { From 64c21958205c631cf98314d85bf2daa96854136c Mon Sep 17 00:00:00 2001 From: Autumn Hullings Date: Thu, 14 May 2026 17:02:57 -0400 Subject: [PATCH 51/53] Add files via upload --- queries/Quadrennial Review Report.sql | 55 +++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 queries/Quadrennial Review Report.sql diff --git a/queries/Quadrennial Review Report.sql b/queries/Quadrennial Review Report.sql new file mode 100644 index 0000000..cb030ee --- /dev/null +++ b/queries/Quadrennial Review Report.sql @@ -0,0 +1,55 @@ +-- Quadrennial Review Report +-- Request (via Email from Amelia to: Autumn Hullings, Nicole Gerlanc, Michelle Brotzman +-- Re: Quadrennial Review Data Request +-- Connect is required to complete a Quadrennial Review every 4 years for the project and we are preparing for the first submission. This is separate from the annual CIER data submission that Kelsey helps us provide the data for. For the Quadrennial Review, we are being asked to provide just a couple of data points. +-- Date of first verified participant in Connect – we opened enrollment on 07/21/2021, so should be somewhere around that date but need the exact date. +-- Date we reached n=50,000 verified participants exactly – based on the weekly logs, this should be sometime near the week of 10/21/2024 but need the exact date. +-- For this data pull, I do not think it is necessary to exclude anyone who has withdrawn or requested data destruction. Let me know if you have any questions. Thank you! + + + +-- START QUERY: Date of first verified participant and date verified participant count reached 50,000 + +WITH verified_participants AS ( + SELECT + Connect_ID, + SAFE_CAST(d_914594314 AS TIMESTAMP) AS verification_ts, + DATE(SAFE_CAST(d_914594314 AS TIMESTAMP)) AS verification_date + FROM `nih-nci-dceg-connect-prod-6d04.FlatConnect.participants` + WHERE Connect_ID IS NOT NULL + AND d_821247024 = '197316935' + AND d_914594314 IS NOT NULL +), + +ranked_verified AS ( + SELECT + Connect_ID, + verification_ts, + verification_date, + ROW_NUMBER() OVER ( + ORDER BY verification_ts, Connect_ID + ) AS verified_participant_number + FROM verified_participants +) + +SELECT + 'First verified participant' AS metric, + Connect_ID, + verification_ts, + verification_date, + verified_participant_number +FROM ranked_verified +WHERE verified_participant_number = 1 + +UNION ALL + +SELECT + '50,000th verified participant' AS metric, + Connect_ID, + verification_ts, + verification_date, + verified_participant_number +FROM ranked_verified +WHERE verified_participant_number = 50000 + +ORDER BY verified_participant_number; From 9c0c7ad98623a21f25a141829c809bf60979a246 Mon Sep 17 00:00:00 2001 From: Autumn Hullings Date: Thu, 14 May 2026 17:03:55 -0400 Subject: [PATCH 52/53] Rename Quadrennial Review Report.sql to quadrennial_review_report.sql --- ...uadrennial Review Report.sql => quadrennial_review_report.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename queries/{Quadrennial Review Report.sql => quadrennial_review_report.sql} (100%) diff --git a/queries/Quadrennial Review Report.sql b/queries/quadrennial_review_report.sql similarity index 100% rename from queries/Quadrennial Review Report.sql rename to queries/quadrennial_review_report.sql From ccf67f0c369fc20c16f0d2374f738f5943246ae8 Mon Sep 17 00:00:00 2001 From: Autumn Hullings Date: Thu, 14 May 2026 17:26:03 -0400 Subject: [PATCH 53/53] Fix comment formatting in quadrennial_review_report.sql Updated the request comment to reflect the correct name and improved clarity. --- queries/quadrennial_review_report.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/queries/quadrennial_review_report.sql b/queries/quadrennial_review_report.sql index cb030ee..21eedcf 100644 --- a/queries/quadrennial_review_report.sql +++ b/queries/quadrennial_review_report.sql @@ -1,5 +1,5 @@ -- Quadrennial Review Report --- Request (via Email from Amelia to: Autumn Hullings, Nicole Gerlanc, Michelle Brotzman +-- Request (via email from Amelia Sager to: Autumn Hullings, Nicole Gerlanc, Michelle Brotzman) -- Re: Quadrennial Review Data Request -- Connect is required to complete a Quadrennial Review every 4 years for the project and we are preparing for the first submission. This is separate from the annual CIER data submission that Kelsey helps us provide the data for. For the Quadrennial Review, we are being asked to provide just a couple of data points. -- Date of first verified participant in Connect – we opened enrollment on 07/21/2021, so should be somewhere around that date but need the exact date. @@ -9,6 +9,7 @@ -- START QUERY: Date of first verified participant and date verified participant count reached 50,000 +-- Data filters: WHERE Verification Status = Verified, Verification D/T (d_914594314) is not null and Connect ID is not null WITH verified_participants AS ( SELECT