-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQLQuery1.sql
More file actions
120 lines (100 loc) · 5.02 KB
/
SQLQuery1.sql
File metadata and controls
120 lines (100 loc) · 5.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
--SELECT * FROM covid19alex..CovidDeaths
--ORDER BY 3,4
--SELECT * FROM covid19alex..CovidVaccinations
--ORDER BY 3,4
------EXPLORING THE COLUMNS WE'LL WORK ON------
SELECT location, date, total_cases, new_cases, total_deaths, population
FROM covid19alex..CovidDeaths
ORDER BY 1,2
------FINDING HOW FATAL IT'S BEEN FOR DIFFERENT COUNTRIES AROUND THE WORLDN------
SELECT location, date, total_cases, total_deaths, ROUND((total_deaths/total_cases)*100, 2) AS mortality_rate
FROM covid19alex..CovidDeaths
ORDER BY mortality_rate DESC
-----LET'S SEE HOW IT WAS FOR CANADA------
SELECT location, date, total_cases, total_deaths, ROUND((total_deaths/total_cases)*100, 2) AS mortality_rate
FROM covid19alex..CovidDeaths
WHERE location = 'Canada'
ORDER BY mortality_rate DESC
-------What about the number of deaths? When was that the highest in Canada?------------
SELECT location, date, total_cases, total_deaths, ROUND((total_deaths/total_cases)*100, 2) AS mortality_rate
FROM covid19alex..CovidDeaths
WHERE location = 'Canada'
ORDER BY CAST(total_deaths AS INT) DESC
-----What about Bangladesh? When was its highest mortality rate??------------
SELECT location, date, total_cases, total_deaths, ROUND((total_deaths/total_cases)*100, 2) AS mortality_rate
FROM covid19alex..CovidDeaths
WHERE location = 'Bangladesh'
ORDER BY mortality_rate DESC
-----And What about the number of deaths? When was that the highest in Bangladesh?------------
SELECT location, date, total_cases, total_deaths, ROUND((total_deaths/total_cases)*100, 2) AS mortality_rate
FROM covid19alex..CovidDeaths
WHERE location = 'Bangladesh'
ORDER BY CAST(total_deaths AS INT) DESC
--------Let's now focus on the highest infection counts per country ---------
SELECT location, MAX(total_cases) AS MaxInfection
FROM covid19alex..CovidDeaths
GROUP BY location
ORDER BY MaxInfection DESC
-------Unfortunately, looks like the "location" column has non-country values like "World" and different contintent names
--let's skip those values by using NOT IN with WHERE for country-based counts only-----------
SELECT location, MAX(total_cases) AS MaxInfection
FROM covid19alex..CovidDeaths
WHERE location NOT IN ('World', 'Europe', 'Asia', 'European Union', 'North America', 'South America', 'Africa')
GROUP BY location
ORDER BY MaxInfection DESC
------Another way to avoid continents--------
SELECT location, MAX(total_cases) AS MaxInfection
FROM covid19alex..CovidDeaths
WHERE continent IS NOT null
GROUP BY location
ORDER BY MaxInfection DESC
---This works because the continent-named "location" values have "continent" as NULL in the sheet
------ How about maximum deaths per country? ------
SELECT location, MAX(CAST(total_deaths AS INT)) AS MaxDeaths
FROM covid19alex..CovidDeaths
WHERE continent IS NOT null
GROUP BY location
ORDER BY MaxDeaths DESC
------ Let's now focus on the continents only -----
SELECT continent, MAX(CAST(total_deaths AS INT)) AS MaxDeaths
FROM covid19alex..CovidDeaths
WHERE continent IS NOT null
GROUP BY continent
ORDER BY MaxDeaths DESC
---- Now, what is we use "location" named under a continent to find the same thing?
SELECT location, MAX(CAST(total_deaths AS INT)) AS MaxDeaths
FROM covid19alex..CovidDeaths
WHERE continent IS null
GROUP BY location
ORDER BY MaxDeaths DESC
----Total death per continent, based on its population-----
SELECT continent, MAX(CAST(population AS INT)) AS max_population, MAX(CAST(total_deaths AS INT)) AS max_deaths
FROM covid19alex..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY continent
ORDER BY max_deaths DESC
--WHERE continent IS NOT NULL
--GROUP BY continent
----Highest Total death per continent, based on its population-----
SELECT continent, CAST(population AS INT) AS population_, CAST(total_deaths AS INT) AS total_deaths_
FROM covid19alex..CovidDeaths
WHERE continent IS NOT NULL AND total_deaths IS NOT NULL
ORDER BY total_deaths DESC
----Let's see the same thing as a percentage----
SELECT continent, CAST(population AS INT) AS population_, CAST(total_deaths AS INT) AS total_deaths_, ROUND((total_deaths/population)*100, 2) AS death_percentage
FROM covid19alex..CovidDeaths
WHERE continent IS NOT NULL AND total_deaths IS NOt NULL
ORDER BY death_percentage DESC
--this is turning out to be quite time-consuming, and we can't use the GROUP BY clause this way due to population and total_deaths not being aggregates
--so, let's try something different, to find similar statistics, but grouped per continent
SELECT
continent,
MAX(CAST(population AS INT)) AS total_population,
MAX(CAST(total_deaths AS INT)) AS total_deaths_,
(MAX(CAST(total_deaths AS FLOAT))/MAX(CAST(population AS FLOAT)))*100 AS death_percentage
--ROUND((CAST(total_deaths AS INT)/CAST(population AS INT))*100,2) AS death_percentage
FROM covid19alex..CovidDeaths
WHERE continent IS NOT NULL AND total_deaths IS NOT NULL
GROUP BY continent
ORDER BY death_percentage DESC
--here we changed the numbers to FLOAT for the percentage calculation, because they're very minuscule (less than 0.001)