-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCOVID Portfolio Project SQL.sql
176 lines (140 loc) · 4.3 KB
/
COVID Portfolio Project SQL.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
SELECT *
FROM portfolio_1.CovidDeaths
SELECT *
FROM portfolio_1.CovidVaccinations
-- Select Data to be used
SELECT location,
TO_DATE(date, 'DD MM YYYY') AS new_date,
total_cases,
new_cases,
total_deaths,
population
FROM portfolio_1.CovidDeaths
ORDER BY location, new_date
-- Looking at Total Cases vs Total Deaths
-- Show likelihood of dying in UK
SELECT location,
TO_DATE(date, 'DD MM YYYY') AS new_date,
total_cases,
total_deaths,
ROUND((total_deaths/total_cases) * 100, 2) AS DeathPercentage
FROM portfolio_1.CovidDeaths
WHERE location = 'United Kingdom'
ORDER BY location, new_date DESC
-- Looking at Total Cases vs Population
SELECT location,
TO_DATE(date, 'DD MM YYYY') AS new_date,
population,
total_cases,
ROUND((total_cases/population) * 100, 2) AS DeathPercentage
FROM portfolio_1.CovidDeaths
WHERE location = 'United Kingdom'
ORDER BY location, new_date DESC
-- Looking at Countries with Highest Infection Rate compared to Population
SELECT location,
population,
MAX(total_cases) as HighestInfectionCount,
MAX(ROUND((total_cases/population) * 100, 2)) AS PercentPopulationInfected
FROM portfolio_1.CovidDeaths
WHERE total_cases IS NOT null and population IS NOT null
GROUP BY location, population
ORDER BY PercentPopulationInfected DESC
-- Show countries with highest death count per population
SELECT location,
MAX(CAST(total_deaths AS int)) as HighestDeathCount
FROM portfolio_1.CovidDeaths
WHERE total_deaths IS NOT null AND continent IS NOT null
GROUP BY location
ORDER BY HighestDeathCount DESC
-- highest death count by continent
SELECT continent,
MAX(CAST(total_deaths AS int)) as HighestDeathCount
FROM portfolio_1.CovidDeaths
WHERE total_deaths IS NOT null AND continent IS NOT null
GROUP BY continent
ORDER BY HighestDeathCount DESC
-- Showing continents with highest death count per population
SELECT continent,
MAX(CAST(total_deaths AS int)) as HighestDeathCount
FROM portfolio_1.CovidDeaths
WHERE total_deaths IS NOT null AND continent IS NOT null
GROUP BY continent
ORDER BY HighestDeathCount DESC
-- Total Population vs Vaccinations
--Using CTE
WITH PopVsVac (continent,
location,
new_date,
population,
new_vaccination,
RollingPeopleVaccinated
)
AS
(
SELECT d.continent,
d.location,
TO_DATE(d.date, 'DD MM YYYY') AS new_date,
d.population,
v.new_vaccinations,
SUM(CAST(v.new_vaccinations AS INTEGER))
OVER (PARTITION BY d.location
ORDER BY d.location, TO_DATE(d.date, 'DD MM YYYY')
) AS RollingPeopleVaccinated
FROM portfolio_1.CovidDeaths AS d
JOIN portfolio_1.CovidVaccinations AS v
ON d.location = v.location
AND d.date = v.date
WHERE v.new_vaccinations IS NOT null AND d.continent IS NOT null
)
SELECT *,
ROUND((RollingPeopleVaccinated/population)*100, 2)
FROM PopVsVAc
ORDER BY location, new_date
-- TEMP TABLE
DROP TABLE IF EXISTS PercentPopulationVaccinated
CREATE TABLE PercentPopulationVaccinated
(
continent varchar,
location varchar,
date date,
population numeric,
new_vaccinations numeric,
RollingPeopleVaccinated numeric
)
INSERT INTO PercentPopulationVaccinated
SELECT d.continent,
d.location,
TO_DATE(d.date, 'DD MM YYYY') AS new_date,
d.population,
CAST(v.new_vaccinations AS INTEGER),
SUM(CAST(v.new_vaccinations AS INTEGER))
OVER (PARTITION BY d.location
ORDER BY d.location, TO_DATE(d.date, 'DD MM YYYY')
) AS RollingPeopleVaccinated
FROM portfolio_1.CovidDeaths AS d
JOIN portfolio_1.CovidVaccinations AS v
ON d.location = v.location
AND d.date = v.date
WHERE v.new_vaccinations IS NOT null AND d.continent IS NOT null
SELECT *,
ROUND((RollingPeopleVaccinated/population)*100, 2)
FROM PercentPopulationVaccinated
ORDER BY location, date
-- Creating view to store data for later visualisations
CREATE VIEW PercentPopulationVaccinatedView AS
SELECT d.continent,
d.location,
TO_DATE(d.date, 'DD MM YYYY') AS new_date,
d.population,
CAST(v.new_vaccinations AS INTEGER),
SUM(CAST(v.new_vaccinations AS INTEGER))
OVER (PARTITION BY d.location
ORDER BY d.location, TO_DATE(d.date, 'DD MM YYYY')
) AS RollingPeopleVaccinated
FROM portfolio_1.CovidDeaths AS d
JOIN portfolio_1.CovidVaccinations AS v
ON d.location = v.location
AND d.date = v.date
WHERE v.new_vaccinations IS NOT null AND d.continent IS NOT null
SELECT *
FROM PercentPopulationVaccinatedView