-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLayoffs_Data_Cleaning.sql
More file actions
107 lines (78 loc) · 2.17 KB
/
Layoffs_Data_Cleaning.sql
File metadata and controls
107 lines (78 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
-- Data cleaning
select *
from layoffs;
create table layoffs_staging like layoffs;
insert layoffs_staging
select *
from layoffs;
-- 1. Remove Duplicates
CREATE TABLE `layoffs_staging2` (
`company` text,
`location` text,
`industry` text,
`total_laid_off` int DEFAULT NULL,
`percentage_laid_off` text,
`date` text,
`stage` text,
`country` text,
`funds_raised_millions` int DEFAULT NULL,
`row_num` int
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
insert into layoffs_staging2
select *,
row_number() over(partition by company, location, stage, country, funds_raised_millions, industry, total_laid_off, percentage_laid_off, `date`) as row_num
from layoffs_staging;
select *
from layoffs_staging2
where row_num > 1;
SET SQL_SAFE_UPDATES = 0;
#disables safe update
delete
from layoffs_staging2
where row_num > 1;
SET SQL_SAFE_UPDATES = 1;
#It's good practice to re-enable it afterward
-- 2. Standardize Data
SET SQL_SAFE_UPDATES = 0;
UPDATE layoffs_staging2
SET
company = TRIM(company),
location = TRIM(location),
stage = TRIM(stage),
country = TRIM(country),
industry = TRIM(industry),
percentage_laid_off = TRIM(percentage_laid_off),
`date` = TRIM(`date`);
update layoffs_staging2
set industry = 'Crypto'
where industry like 'Crypto%';
update layoffs_staging2
set country = 'United States'
where country like 'United States%';
update layoffs_staging2
set `date` = str_to_date(`date`, '%m/%d/%Y');
alter table layoffs_staging2
modify column `date` Date;
SET SQL_SAFE_UPDATES = 1;
-- 3. Null Values or blank values
SET SQL_SAFE_UPDATES = 0;
update layoffs_staging2
set industry = null
where industry = '';
update layoffs_staging2 t1
join layoffs_staging2 t2
on t1.company = t2.company
set t1.industry = t2.industry #populates null t1's with non null t2 data
where t1.industry is null and t2.industry is not null;
delete
from layoffs_staging2
where total_laid_off is null
and percentage_laid_off is null;
SET SQL_SAFE_UPDATES = 1;
-- 4. Remove any columns
SET SQL_SAFE_UPDATES = 0;
alter table layoffs_staging2
drop column row_num;
SET SQL_SAFE_UPDATES = 1;
select *
from layoffs_staging2; #presents the cleaned table