-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQL COVID Portofolio Project.sql
More file actions
114 lines (89 loc) · 4.13 KB
/
SQL COVID Portofolio Project.sql
File metadata and controls
114 lines (89 loc) · 4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
SELECT * FROM DataAnalystPortofolio..CovidDeaths$
ORDER BY 3,4;
SELECT * FROM DataAnalystPortofolio..CovidVaccination$
ORDER BY 3,4;
--Select Data that we are going to be using
SELECT location, date, total_cases, new_cases, total_deaths, population
FROM DataAnalystPortofolio..CovidDeaths$
ORDER BY 1,2;
-- Looking at Total cases vs Total Deaths
-- Shows likelihood of dying if you contract covid in Romania
SELECT location, date, total_cases, total_deaths, CONCAT(CAST((total_deaths/total_cases)*100 AS VARCHAR(40)),' %') death_percentage
FROM DataAnalystPortofolio..CovidDeaths$
WHERE location LIKE 'Romania'
ORDER BY 2 DESC;
-- Looking at Total Cases vs Population
-- Shows what percentage of population got Covid
SELECT location, date, population,total_cases,CONCAT(CAST((total_cases/population)*100 AS VARCHAR(40)),' %') got_covid
FROM DataAnalystPortofolio..CovidDeaths$
WHERE location LIKE 'Romania'
ORDER BY 2;
-- Looking at Countries with highest infection rate compared to population
SELECT location, population, MAX(total_cases) highest_infection_count, CONCAT(CAST((MAX(total_cases/population))*100 AS VARCHAR(40)),' %') percent_population_infected
FROM DataAnalystPortofolio..CovidDeaths$
GROUP BY location, population
ORDER BY percent_population_infected DESC;
-- Showing Countries with highest death count per population
SELECT location, MAX(CAST(total_deaths AS INT)) AS total_death_count
FROM DataAnalystPortofolio..CovidDeaths$
WHERE _ IS NOT NULL
GROUP BY location
ORDER BY total_death_count DESC;
-- LET'S BREAK THINGS DOWN BY CONTINENT
-- Showing continents with the highest death count per population
SELECT "_" continent, MAX(CAST(total_deaths AS INT)) AS total_death_count
FROM DataAnalystPortofolio..CovidDeaths$
WHERE _ IS NOT NULL
GROUP BY _
ORDER BY total_death_count DESC;
-- GLOBAL NUMBERS
SELECT SUM(new_cases) total_cases, SUM(CAST(new_deaths AS INT)) total_deaths, SUM(CAST(new_deaths AS INT))/SUM(new_cases)*100 death_percentage
FROM DataAnalystPortofolio..CovidDeaths$
WHERE _ IS NOT NUL
ORDER BY 1,2;
-- Looking at total population VS vaccination
WITH pop_vs_vacc (_, location, date, population,new_vaccinations, rolling_people_vaccinated) -- USE CTE
AS
(
SELECT dea._, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(CAST(vac.new_vaccinations AS INT)) OVER (Partition by dea.location ORDER BY dea.location, dea.date) AS rolling_people_vaccinated
--(rolling_people_vaccinated/population)*100
FROM DataAnalystPortofolio..CovidDeaths$ dea
JOIN DataAnalystPortofolio..CovidVaccination$ vac
ON dea.location=vac.location
AND dea.date=vac.date
WHERE dea._ IS NOT NULL
--ORDER BY 1,2,3;
)
SELECT *, (rolling_people_vaccinated/population)*100 FROM pop_vs_vacc;
-- OR USING A TEMP TABLE
DROP TABLE IF EXISTS #PercentPopulationVaccinated
CREATE TABLE #PercentPopulationVaccinated
(
_ nvarchar(255),
location nvarchar(255),
date datetime,
population numeric,
new_vaccinations numeric,
rolling_people_vaccinated numeric
)
INSERT INTO #PercentPopulationVaccinated
SELECT dea._, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(CAST(vac.new_vaccinations AS INT)) OVER (Partition by dea.location ORDER BY dea.location, dea.date) AS rolling_people_vaccinated
--(rolling_people_vaccinated/population)*100
FROM DataAnalystPortofolio..CovidDeaths$ dea
JOIN DataAnalystPortofolio..CovidVaccination$ vac
ON dea.location=vac.location
AND dea.date=vac.date
WHERE dea._ IS NOT NULL
--ORDER BY 1,2,3;
SELECT * FROM #PercentPopulationVaccinated;
-- Creating View to store data for later visualizations
CREATE VIEW PercentPopulationVaccinated AS
SELECT dea._, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(CAST(vac.new_vaccinations AS INT)) OVER (Partition by dea.location ORDER BY dea.location, dea.date) AS rolling_people_vaccinated
--(rolling_people_vaccinated/population)*100
FROM DataAnalystPortofolio..CovidDeaths$ dea
JOIN DataAnalystPortofolio..CovidVaccination$ vac
ON dea.location=vac.location
AND dea.date=vac.date
WHERE dea._ IS NOT NULL
--ORDER BY 1,2,3;
SELECT * FROM PercentPopulationVaccinated;