library(tidyverse)
library(readxl)
# Member 2 (pair 1) write code to load File 1 data here:
data1 <- read_csv("data/mismanaged-plastic-waste-per-capita.csv")
# Member 4 (pair 2) write code to load File 2 data here:
data2 <- read_csv2("data/per-capita-ocean-plastic-waste.txt")
# Member 6 (pair 3) write code to load File 3 data here:
data3 <- read_tsv("data/UN_country_population.tsv")
# Member 1 (pair 1) write code to load File 4 data here:
data4 <- read_excel("data/UN_country_region.xlsx")
# Member 2 (pair 1) write code to create object 'plastic_data_all':
plastic_data_all <- data1
# Member 4 (pair 2) write code to join your data to 'plastic_data_all':
plastic_data_all <- data2 %>%
select(-name) %>%
left_join(plastic_data_all, by = "code")
# Member 6 (pair 3) write code to join your data to 'plastic_data_all':
plastic_data_all <- data3 %>%
select(-name) %>%
right_join(plastic_data_all, by = "code")
# Member 1 (pair 1) write code to join your data to 'plastic_data_all':
plastic_data_all <- data4 %>%
select(-name) %>%
right_join(plastic_data_all, by = "code")
# Member 2 (pair 1) adds code here to view the first 10 rows of 'plastic_data_all'
plastic_data_all %>% head(n = 10)
## # A tibble: 10 × 6
## code region population mismanaged_plastic_o…¹ name mismanaged_plastic
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 AGO Africa 31808. 0.0270 Ango… 7.45
## 2 ALB Europe 2876. 0.543 Alba… 24.2
## 3 ARE Asia 9176. 0.00143 Unit… 0.526
## 4 ARG Latin Ameri… 44583. 0.0924 Arge… 10.4
## 5 ATG Latin Ameri… 91.9 0.0206 Anti… 6.46
## 6 AUS Oceania 25170. 0.00127 Aust… 0.209
## 7 BEL Europe 11480. 0.00295 Belg… 0.198
## 8 BEN Africa 12114. 0.139 Benin 11.3
## 9 BFA Africa 20671. 0 Burk… 15.6
## 10 BGD Asia 164605. 0.151 Bang… 6.27
## # ℹ abbreviated name: ¹mismanaged_plastic_ocean
# Write your code here to answer exercise 1
plastic_data_all %>%
count(region, sort = TRUE)
## # A tibble: 6 × 2
## region n
## <chr> <int>
## 1 Africa 43
## 2 Asia 39
## 3 Latin America and The Caribbean 34
## 4 Europe 31
## 5 Oceania 10
## 6 North America 2
# Write your code here to answer exercise 2
plastic_data_all <- plastic_data_all %>%
mutate(total_mismanaged_plastic = mismanaged_plastic * population)
plastic_data_all %>%
group_by(region) %>%
summarise(avg_total = mean(total_mismanaged_plastic))
## # A tibble: 6 × 2
## region avg_total
## <chr> <dbl>
## 1 Africa 320729.
## 2 Asia 1026059.
## 3 Europe 38180.
## 4 Latin America and The Caribbean 193450.
## 5 North America 147190.
## 6 Oceania 14577.
# Write your code here to answer exercise 3
plastic_data_all <- plastic_data_all %>%
mutate(pct_mismanaged_plastic_ocean = mismanaged_plastic_ocean / mismanaged_plastic)
plastic_data_all %>%
group_by(region) %>%
summarise(med_pct = median(pct_mismanaged_plastic_ocean))
## # A tibble: 6 × 2
## region med_pct
## <chr> <dbl>
## 1 Africa NA
## 2 Asia 0.00974
## 3 Europe 0.00846
## 4 Latin America and The Caribbean 0.0307
## 5 North America 0.00959
## 6 Oceania 0.0158
plastic_data_all %>% filter(code == "SOM")
## # A tibble: 1 × 8
## code region population mismanaged_plastic_ocean name mismanaged_plastic
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 SOM Africa 15690. 0 Somalia 0
## # ℹ 2 more variables: total_mismanaged_plastic <dbl>,
## # pct_mismanaged_plastic_ocean <dbl>
NaN
stands for Not a Number. This has
happened because both mismanaged plastic waste per capita estimates for
Somalia is 0
, therefore the percentage calculation is
attempting to compute 0/0
! As there is a missing value
percentage value for Somalia, then it is not possible to compute the
median statistic for Africa, hence why the summary table contains
NA
.
plastic_data_all %>%
drop_na() %>%
group_by(region) %>%
summarise(med_pct = median(pct_mismanaged_plastic_ocean)) %>%
arrange(med_pct)
## # A tibble: 6 × 2
## region med_pct
## <chr> <dbl>
## 1 Africa 0.00526
## 2 Europe 0.00846
## 3 North America 0.00959
## 4 Asia 0.00974
## 5 Oceania 0.0158
## 6 Latin America and The Caribbean 0.0307
# Write your code here to answer exercise 4
plastic_data_all <- plastic_data_all %>%
rename(
pop = population,
ocean_plastic = mismanaged_plastic_ocean,
mismanaged = mismanaged_plastic,
tot_plastic = total_mismanaged_plastic,
pct_ocean = pct_mismanaged_plastic_ocean
)
plastic_data_all <- plastic_data_all %>%
mutate(region = str_replace_all(region, "Latin America and The Caribbean", "LAC"))
plastic_data_all %>% head(n = 10)
## # A tibble: 10 × 8
## code region pop ocean_plastic name mismanaged tot_plastic pct_ocean
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 AGO Africa 31808. 0.0270 Angola 7.45 236821. 0.00363
## 2 ALB Europe 2876. 0.543 Albania 24.2 69703. 0.0224
## 3 ARE Asia 9176. 0.00143 United… 0.526 4822. 0.00273
## 4 ARG LAC 44583. 0.0924 Argent… 10.4 463744. 0.00888
## 5 ATG LAC 91.9 0.0206 Antigu… 6.46 594. 0.00319
## 6 AUS Oceania 25170. 0.00127 Austra… 0.209 5259. 0.00608
## 7 BEL Europe 11480. 0.00295 Belgium 0.198 2272. 0.0149
## 8 BEN Africa 12114. 0.139 Benin 11.3 136872. 0.0123
## 9 BFA Africa 20671. 0 Burkin… 15.6 322769. 0
## 10 BGD Asia 164605. 0.151 Bangla… 6.27 1031765. 0.0241
Create your own table/summary using the edited
plastic_data_all
data frame.