網頁

2021年5月23日 星期日

R:COVID-19 疫情統計

約翰·霍普金斯大學的Github 有分享了全球 COVID-19 疫情的統計資料,點選開啟連結

time_series_covid19_confirmed_global.csv
time_series_covid19_deaths_global.csv
time_series_covid19_recovered_global.csv
這3個檔案分別是全球確診數,死亡人數,復原人數統計

 

將這3個csv檔爬下來後用R的資料篩選,發現其中 5/22/2021台灣的確診數是723例,
並不是指揮中心公布的321例。

R的程式在此,有興趣的人可以參考: 

https://ideone.com/kw4Heg

如果不會寫程式其中網頁也有提供了查詢各個地區國家的疫情統計。

COVID-19 Dashboard


library(tidyverse)
library(ggplot2)
library(cowplot)
library(patchwork)
path <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
confirm_file <- "time_series_covid19_confirmed_global.csv"
deaths_file <- "time_series_covid19_deaths_global.csv"
recovered_file <- "time_series_covid19_recovered_global.csv"

confirmed_df <- read_csv(paste0(path, confirm_file))
deaths_df <- read_csv(paste0(path, deaths_file))
recovered_df <- read_csv(paste0(path, recovered_file))

sd <- length(confirmed_df) - 21  # start date
ed <- length((confirmed_df))     # end date
dates <- colnames(confirmed_df[, sd:ed])
dates <- as.Date(dates,format = "%m/%d/%y")
dates <- as.POSIXct(dates,tz = "GMT")
getCountrydata <- function(Country,
                           dates = dates,
                           confirmed_df = confirmed_df,
                           deaths_df = deaths_df,
                           recovered_df = recovered_df,
                           sd = sd, ed = ed) {
  if (Country == "all") {
    cases <- confirmed_df %>%
      #select(-(1:400)) %>%
      select(sd:ed) %>%
      colSums()
    death <- deaths_df %>%
      #select(-(1:400)) %>%
      select(sd:ed) %>%
      colSums()
    recovered <- recovered_df %>%
      #select(-(1:400)) %>%
      select(sd:ed) %>%
      colSums()
  }
  else {
    Country <- enquo(Country)
    cases <- confirmed_df %>%
      filter(`Country/Region` == !! Country) %>%
      #select(-(1:400)) %>%
      select(sd:ed) %>%
      colSums()
    death <- deaths_df %>%
      filter(`Country/Region` == !! Country) %>%
      #select(-(1:400)) %>%
      select(sd:ed) %>%
      colSums()
    recovered <- recovered_df %>%
      filter(`Country/Region` == !! Country) %>%
      #select(-(1:400)) %>%
      select(sd:ed) %>%
      colSums()
  }
  res.df <- tibble(dates,
                   cases = cases,
                   death = death,
                   recovery = recovered,
                   mortality_rate = death/cases,
                   recovery_rate = recovery/cases)
  return(res.df)
}
world.df <- getCountrydata(Country = "all",
                           dates = dates,
                           confirmed_df = confirmed_df,
                           deaths_df = deaths_df,
                           recovered_df = recovered_df, sd, ed)
#Taiwan
taiwan.df <- getCountrydata(Country = "Taiwan*",
                            dates = dates,
                            confirmed_df = confirmed_df,
                            deaths_df = deaths_df,
                            recovered_df = recovered_df, sd, ed)

tmp.cases.plot <- function(df.plot, Country) {
  df.plot %>%
    mutate(cases_k = cases) %>%
    ggplot( aes(x=dates, y=cases_k)) +
    geom_line(color="#69b3a2") +
    geom_point(color="#69b3a2", size=1) +
    scale_x_datetime(breaks = world.df$dates,
                     date_labels = '%m/%d')+
    ggtitle(paste0(Country," Evolution of COVID-19 cases")) +
    ylab("cases") +
    theme_cowplot() +
    theme(axis.text.x = element_text(size = 10,
                                     vjust = 0.5,
                                     hjust = 0.5,
                                     angle = 90))
}

tmp.deaths.plot <- function(df.plot, Country) {
  df.plot %>%
    ggplot( aes(x=dates, y=mortality_rate)) +
    geom_line(color="#69b3a2") +
    geom_point(color="#69b3a2", size=1) +
    scale_x_datetime(breaks = world.df$dates,
                     date_labels = '%m/%d')+
    ggtitle(paste0(Country," Evolution of COVID-19 death rates")) +
    ylab("Mortality rates(Death/Cases)") +
    theme_cowplot()+
    theme(axis.text.x = element_text(size = 10,
                                     vjust = 0.5,
                                     hjust = 0.5,
                                     angle = 90))
}
tmp.recover.plot <- function(df.plot, Country) {
  df.plot %>%
    ggplot( aes(x=dates, y=recovery_rate)) +
    geom_line(color="#69b3a2") +
    geom_point(color="#69b3a2", size=1) +
    scale_x_datetime(breaks = world.df$dates,
                     date_labels = '%m/%d') +
    scale_y_continuous(breaks=seq(0,1,0.2),limits = c(0,1)) +
    ggtitle(paste0(Country," Evolution of COVID-19 recovery rates")) +
    ylab("Recovery rates(Recovery/Cases)") +
    theme_cowplot()+
    theme(axis.text.x = element_text(size = 10,
                                     vjust = 0.5,
                                     hjust = 0.5,
                                     angle = 90))
}
#----------
sd <- length(confirmed_df) - 22; sd
ed <- sd; ed
dates <- colnames(confirmed_df[, sd:ed])
df <- getCountrydata(Country = "Taiwan*",
                     dates = dates,
                     confirmed_df = confirmed_df,
                     deaths_df = deaths_df,
                     recovered_df = recovered_df, sd, ed)
first_data <- df$cases[[1]] ; first_data
taiwan.df['daily'] <- NA; taiwan.df
for(i in 1:nrow(taiwan.df)) {
  if(i == 1)
    taiwan.df$daily[i] <- taiwan.df$cases[i]- first_data
  else
    taiwan.df$daily[i] <- taiwan.df$cases[i]- taiwan.df$cases[i - 1]
}; tail(taiwan.df, 10)

tmp.daily.plot <- function(df.plot, Country) {
  df.plot %>%
    mutate(daily = daily) %>%
    ggplot(aes(x = dates, y = daily)) +
    geom_line(color="#69b3a2") +
    geom_point(color="red", size=1) +
    scale_x_datetime(breaks = world.df$dates, date_labels = '%m/%d')+
    ggtitle(paste0(Country," Evolution of COVID-19 daily")) +
    ylab("daily") +
    theme_cowplot() +
    theme(axis.text.x = element_text(size = 10,
                                     vjust = 0.5,
                                     hjust = 0.5,
                                     angle = 90))
}
pic1 <- tmp.daily.plot(df.plot = taiwan.df, Country = "Taiwan"); pic1
pic2 <- tmp.cases.plot(df.plot = taiwan.df, Country = "Taiwan"); pic2
pic3 <- tmp.deaths.plot(df.plot = taiwan.df, Country = "Taiwan"); pic3
pic4 <- tmp.recover.plot(df.plot = taiwan.df, Country = "Taiwan"); pic4
pic1 + pic2 + pic3 + pic4 + plot_layout(ncol = 2)

輸出:



沒有留言:

張貼留言