井戸端統計情報

if (!require("pacman")) install.packages("pacman")
Loading required package: pacman
pacman::p_load(
  tidyverse,
  jsonlite,
  lubridate,
  ggpubr,
  patchwork
)
as_datetime(1500000000, tz = "Asia/Tokyo")
[1] "2017-07-14 11:40:00 JST"
# Get Stats
df <-
  fromJSON(
    "https://raw.githubusercontent.com/meganii/sandbox-github-actions-scheduler/main/villagepump/stats/pages.json"
  ) %>%
  as_tibble %>%
  tibble::rownames_to_column("page_num") %>%
  unnest_wider(pages) %>% 
  mutate(created = as.Date(as_datetime(created)),
         updated = as.Date(as_datetime(updated)),
         page_num = as.numeric(page_num))


weekly_counts <- df %>% 
  mutate(week = floor_date(created, unit = "week")) %>% 
  count(week) %>% 
  complete(
    week = seq.Date(
      from = min(week, na.rm=T),
      to = max(week, na.rm=T),
      by = "week"),
    fill = list(n = 0))
weekly_counts
# A tibble: 204 × 2
   week           n
   <date>     <int>
 1 2020-01-19     5
 2 2020-01-26     0
 3 2020-02-02     0
 4 2020-02-09     0
 5 2020-02-16     0
 6 2020-02-23     0
 7 2020-03-01     0
 8 2020-03-08     0
 9 2020-03-15     0
10 2020-03-22     0
# ℹ 194 more rows
month_counts <- df %>% 
  mutate(month = floor_date(created, unit = "month")) %>% 
  count(month) %>% 
  complete(
    month = seq.Date(
      from = min(month, na.rm=T),
      to = max(month, na.rm=T),
      by = "month"),
    fill = list(n = 0))
month_counts
# A tibble: 48 × 2
   month          n
   <date>     <int>
 1 2020-01-01     5
 2 2020-02-01     0
 3 2020-03-01     0
 4 2020-04-01     5
 5 2020-05-01     0
 6 2020-06-01     0
 7 2020-07-01     1
 8 2020-08-01    71
 9 2020-09-01   144
10 2020-10-01   307
# ℹ 38 more rows
# Calc diff
page_unit <- 1000
df2 <- df %>%
  filter(page_num %% page_unit == 0) %>%
  mutate(
    prev_val = dplyr::if_else(is.na(lag(created)), as.Date('2020-01-19'), lag(created)),
    days = as.numeric(ymd(created) - ymd(prev_val)),
    "Δpages/days" = round(page_unit / days, 1)
  ) %>% 
  select(1, 6, 9:10)
df2
# A tibble: 23 × 4
   page_num created    descriptions prev_val  
      <dbl> <date>     <list<list>> <date>    
 1     1000 2020-11-30          [1] 2020-01-19
 2     2000 2021-01-28          [1] 2020-11-30
 3     3000 2021-04-09          [1] 2021-01-28
 4     4000 2021-05-07          [1] 2021-04-09
 5     5000 2021-08-19          [1] 2021-05-07
 6     6000 2022-01-09          [1] 2021-08-19
 7     7000 2022-03-12          [1] 2022-01-09
 8     8000 2022-04-29          [1] 2022-03-12
 9     9000 2022-06-03          [1] 2022-04-29
10    10000 2022-08-04          [1] 2022-06-03
# ℹ 13 more rows
# Created pages by Month
Grid_Fig2 <- ggplot(month_counts, aes(month, n)) +
  geom_bar(stat="identity") + 
  scale_x_date(
    expand = c(0,0),             # 両端の余分なスペースを削除
    # 1ヶ月ごとに軸ラベルを表示する
    date_breaks = "1 months",
    # 月・日をラベルとして表示する
    date_labels = "%y-%m"
  ) +
  theme(axis.text.x = element_text(angle=45, hjust=1)) +
  labs(title = "Pages per month") +
  xlab("Created Month") +
  ylab("page_num")

Grid_Fig1 <- ggplot(df, aes(created, page_num)) +
  geom_line() +
  geom_point(data= df2, aes(x = created, y = page_num), colour = "red3") +
  scale_y_continuous(breaks = seq(from=0, to=max(df2$page_num)+1000, by=1000)) +
  scale_x_date(
    expand = c(0,0),            # 両端の余分なスペースを削除
    date_breaks = "1 month",    # 1ヶ月ごとに軸ラベルを表示する
    date_labels = "%m"          # 月をラベルとして表示する
  ) + 
  xlab("") +
  labs(title = "Total pages")

min_month <- min(month_counts$month)
max_month <- max(month_counts$month) + months(1)
xlimits <- coord_cartesian(xlim = c(min_month, max_month)) # xlim() はNG
p <- wrap_plots(Grid_Fig1 + xlimits, Grid_Fig2 + xlimits, nrow = 2)

## Save to png
p