約翰·霍普金斯大學的Github 有分享了全球 COVID-19 疫情的統計資料,點選開啟連結,
time_series_covid19_confirmed_global.csvtime_series_covid19_deaths_global.csv
time_series_covid19_recovered_global.csv
這3個檔案分別是全球確診數,死亡人數,復原人數統計
將這3個csv檔爬下來後用R的資料篩選,發現其中 5/22/2021台灣的確診數是723例,
並不是指揮中心公布的321例。
R的程式在此,有興趣的人可以參考:
如果不會寫程式其中網頁也有提供了查詢各個地區國家的疫情統計。
library(tidyverse)
library(ggplot2)
library(cowplot)
library(patchwork)
path <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
confirm_file <- "time_series_covid19_confirmed_global.csv"
deaths_file <- "time_series_covid19_deaths_global.csv"
recovered_file <- "time_series_covid19_recovered_global.csv"
confirmed_df <- read_csv(paste0(path, confirm_file))
deaths_df <- read_csv(paste0(path, deaths_file))
recovered_df <- read_csv(paste0(path, recovered_file))
sd <- length(confirmed_df) - 21 # start date
ed <- length((confirmed_df)) # end date
dates <- colnames(confirmed_df[, sd:ed])
dates <- as.Date(dates,format = "%m/%d/%y")
dates <- as.POSIXct(dates,tz = "GMT")
getCountrydata <- function(Country,
dates = dates,
confirmed_df = confirmed_df,
deaths_df = deaths_df,
recovered_df = recovered_df,
sd = sd, ed = ed) {
if (Country == "all") {
cases <- confirmed_df %>%
#select(-(1:400)) %>%
select(sd:ed) %>%
colSums()
death <- deaths_df %>%
#select(-(1:400)) %>%
select(sd:ed) %>%
colSums()
recovered <- recovered_df %>%
#select(-(1:400)) %>%
select(sd:ed) %>%
colSums()
}
else {
Country <- enquo(Country)
cases <- confirmed_df %>%
filter(`Country/Region` == !! Country) %>%
#select(-(1:400)) %>%
select(sd:ed) %>%
colSums()
death <- deaths_df %>%
filter(`Country/Region` == !! Country) %>%
#select(-(1:400)) %>%
select(sd:ed) %>%
colSums()
recovered <- recovered_df %>%
filter(`Country/Region` == !! Country) %>%
#select(-(1:400)) %>%
select(sd:ed) %>%
colSums()
}
res.df <- tibble(dates,
cases = cases,
death = death,
recovery = recovered,
mortality_rate = death/cases,
recovery_rate = recovery/cases)
return(res.df)
}
world.df <- getCountrydata(Country = "all",
dates = dates,
confirmed_df = confirmed_df,
deaths_df = deaths_df,
recovered_df = recovered_df, sd, ed)
#Taiwan
taiwan.df <- getCountrydata(Country = "Taiwan*",
dates = dates,
confirmed_df = confirmed_df,
deaths_df = deaths_df,
recovered_df = recovered_df, sd, ed)
tmp.cases.plot <- function(df.plot, Country) {
df.plot %>%
mutate(cases_k = cases) %>%
ggplot( aes(x=dates, y=cases_k)) +
geom_line(color="#69b3a2") +
geom_point(color="#69b3a2", size=1) +
scale_x_datetime(breaks = world.df$dates,
date_labels = '%m/%d')+
ggtitle(paste0(Country," Evolution of COVID-19 cases")) +
ylab("cases") +
theme_cowplot() +
theme(axis.text.x = element_text(size = 10,
vjust = 0.5,
hjust = 0.5,
angle = 90))
}
tmp.deaths.plot <- function(df.plot, Country) {
df.plot %>%
ggplot( aes(x=dates, y=mortality_rate)) +
geom_line(color="#69b3a2") +
geom_point(color="#69b3a2", size=1) +
scale_x_datetime(breaks = world.df$dates,
date_labels = '%m/%d')+
ggtitle(paste0(Country," Evolution of COVID-19 death rates")) +
ylab("Mortality rates(Death/Cases)") +
theme_cowplot()+
theme(axis.text.x = element_text(size = 10,
vjust = 0.5,
hjust = 0.5,
angle = 90))
}
tmp.recover.plot <- function(df.plot, Country) {
df.plot %>%
ggplot( aes(x=dates, y=recovery_rate)) +
geom_line(color="#69b3a2") +
geom_point(color="#69b3a2", size=1) +
scale_x_datetime(breaks = world.df$dates,
date_labels = '%m/%d') +
scale_y_continuous(breaks=seq(0,1,0.2),limits = c(0,1)) +
ggtitle(paste0(Country," Evolution of COVID-19 recovery rates")) +
ylab("Recovery rates(Recovery/Cases)") +
theme_cowplot()+
theme(axis.text.x = element_text(size = 10,
vjust = 0.5,
hjust = 0.5,
angle = 90))
}
#----------
sd <- length(confirmed_df) - 22; sd
ed <- sd; ed
dates <- colnames(confirmed_df[, sd:ed])
df <- getCountrydata(Country = "Taiwan*",
dates = dates,
confirmed_df = confirmed_df,
deaths_df = deaths_df,
recovered_df = recovered_df, sd, ed)
first_data <- df$cases[[1]] ; first_data
taiwan.df['daily'] <- NA; taiwan.df
for(i in 1:nrow(taiwan.df)) {
if(i == 1)
taiwan.df$daily[i] <- taiwan.df$cases[i]- first_data
else
taiwan.df$daily[i] <- taiwan.df$cases[i]- taiwan.df$cases[i - 1]
}; tail(taiwan.df, 10)
tmp.daily.plot <- function(df.plot, Country) {
df.plot %>%
mutate(daily = daily) %>%
ggplot(aes(x = dates, y = daily)) +
geom_line(color="#69b3a2") +
geom_point(color="red", size=1) +
scale_x_datetime(breaks = world.df$dates, date_labels = '%m/%d')+
ggtitle(paste0(Country," Evolution of COVID-19 daily")) +
ylab("daily") +
theme_cowplot() +
theme(axis.text.x = element_text(size = 10,
vjust = 0.5,
hjust = 0.5,
angle = 90))
}
pic1 <- tmp.daily.plot(df.plot = taiwan.df, Country = "Taiwan"); pic1
pic2 <- tmp.cases.plot(df.plot = taiwan.df, Country = "Taiwan"); pic2
pic3 <- tmp.deaths.plot(df.plot = taiwan.df, Country = "Taiwan"); pic3
pic4 <- tmp.recover.plot(df.plot = taiwan.df, Country = "Taiwan"); pic4
pic1 + pic2 + pic3 + pic4 + plot_layout(ncol = 2)
輸出:
沒有留言:
張貼留言