我有一个数据集，我想要整体可视化，并通过一些不同的变量分解 . 我创建了一个带有玩具闪亮应用程序的flexdashboard来选择分解类型，并使用工作代码来绘制正确的子集 .

我的方法是重复的，这对我来说是一个暗示，我错过了一个更好的方法来做到这一点 . 让我沮丧的是需要按日期计算并扩展矩阵 . 我不确定如何在一个管道中按周计算组数 . 我分几步完成并组合 .

思考？

（ps . 我在RStudio Community上问过这个问题，但我认为它可能更像是一个“SO question” . 我没有权限将其从RSC中删除，所以对于交叉帖子道歉 . ）

---
title: "test"
output: 
  flexdashboard::flex_dashboard:
    theme: bootstrap
runtime: shiny
---

```{r setup, include=FALSE}
  library(flexdashboard)
  library(tidyverse)
  library(tibbletime)
  library(dygraphs)
  library(magrittr)
  library(xts)

  set.seed(1)
  dat <- data.frame(date = seq(as.Date("2018-01-01"), 
                               as.Date("2018-06-30"), 
                               "days"),
                    sex = sample(c("male", "female"), 181, replace=TRUE),
                    lang = sample(c("english", "spanish"), 181, replace=TRUE),
                    age = sample(20:35, 181, replace=TRUE))
  dat <- sample_n(dat, 80)

  radioButtons("diss", label = "Disaggregation",
    choices = list("All" = 1, "By Sex" = 2, "By Language" = 3), 
    selected = 1)

Page 1

# all
  all <- reactive(
  dat %>%  
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>% # convert to tibble time object
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        date[length(date)],
                        by = "1 week"),
             fill = list(total = 0)) 
  )

# males only
  males <- reactive(
  dat %>%  
    filter(sex=="male") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_m = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        date[length(date)],
                        by = "1 week"),
             fill = list(total_m = 0)) 
  )

# females only
  females <- reactive(
  dat %>%  
    filter(sex=="female") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_f = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        date[length(date)],
                        by = "1 week"),
             fill = list(total_f = 0)) 
  )

# english only
  english <- reactive(
  dat %>%  
    filter(lang=="english") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_e = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        date[length(date)],
                        by = "1 week"),
             fill = list(total_e = 0)) 
  )

# spanish only
  spanish <- reactive(
  dat %>%  
    filter(lang=="spanish") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_s = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        date[length(date)],
                        by = "1 week"),
             fill = list(total_s = 0)) 
  )

# combine

  totals <- reactive({

  all <- all()
  females <- females()
  males <- males()
  english <- english()
  spanish <- spanish()

  all %>%
    select(date, total) %>%
    full_join(select(females, date, total_f), by = "date") %>%
    full_join(select(males, date, total_m), by = "date") %>%
    full_join(select(english, date, total_e), by = "date") %>%
    full_join(select(spanish, date, total_s), by = "date") 
  })

# convert to xts
  totals_ <- reactive({
    totals <- totals()
    xts(totals, order.by = totals$date)
  })

# plot
  renderDygraph({

  totals_ <- totals_()

  if (input$diss == 1) {
  dygraph(totals_[, "total"],
          main= "All") %>%
    dySeries("total", label = "All") %>%
    dyRangeSelector() %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else if (input$diss == 2) {
    dygraph(totals_[, c("total_f", "total_m")],
            main = "By sex") %>%
    dyRangeSelector() %>%
    dySeries("total_f", label = "Female") %>%
    dySeries("total_m", label = "Male") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else {
    dygraph(totals_[, c("total_e", "total_s")],
            main = "By language") %>%
    dyRangeSelector() %>%
    dySeries("total_e", label = "English") %>%
    dySeries("total_s", label = "Spanish") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE)
  }
  })


 **Update:** 

@Jon Spring建议编写一个函数来减少一些重复（在下面应用），这是一个很好的改进 . 然而，基本方法是相同的 . 细分，计算，组合，绘图 . 有没有办法在不分裂和重新组合的情况下做到这一点？

```java
---
title: "test"
output: 
  flexdashboard::flex_dashboard:
    theme: bootstrap
runtime: shiny
---

```{r setup, include=FALSE}
  library(flexdashboard)
  library(tidyverse)
  library(tibbletime)
  library(dygraphs)
  library(magrittr)
  library(xts)

# generate data
  set.seed(1)
  dat <- data.frame(date = seq(as.Date("2018-01-01"), 
                               as.Date("2018-06-30"), 
                               "days"),
                    sex = sample(c("male", "female"), 181, replace=TRUE),
                    lang = sample(c("english", "spanish"), 181, replace=TRUE),
                    age = sample(20:35, 181, replace=TRUE))
  dat <- sample_n(dat, 80)

# Jon Spring's function
  prep_dat <- function(filtered_dat, col_name = "total") {
  filtered_dat %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
    select(date, new) %>%
    tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
    group_by(date) %>%
    mutate(total = sum(new, na.rm = TRUE)) %>%
    distinct(date, .keep_all = TRUE) %>%
    ungroup() %>%
    # expand matrix to include weeks without data
    complete(
      date = seq(date[1], date[length(date)], by = "1 week"),
      fill = list(total = 0)
    )
  }

  radioButtons("diss", label = "Disaggregation",
    choices = list("All" = 1, "By Sex" = 2, "By Language" = 3), 
    selected = 1)

Page 1

# all
  all <- reactive(
  prep_dat(dat) 
  )

# males only
  males <- reactive(
  prep_dat(
    dat %>% 
    filter(sex == "male")
  ) %>% 
    rename("total_m" = "total")
  )

# females only
  females <- reactive(
  prep_dat(
    dat %>% 
    filter(sex == "female")
  ) %>% 
    rename("total_f" = "total")
  )

# english only
  english <- reactive(
  prep_dat(
    dat %>% 
    filter(lang == "english")
  ) %>% 
    rename("total_e" = "total")
  )

# spanish only
  spanish <- reactive(
  prep_dat(
    dat %>% 
    filter(lang == "spanish")
  ) %>% 
    rename("total_s" = "total")
  )

# combine

  totals <- reactive({

  all <- all()
  females <- females()
  males <- males()
  english <- english()
  spanish <- spanish()

  all %>%
    select(date, total) %>%
    full_join(select(females, date, total_f), by = "date") %>%
    full_join(select(males, date, total_m), by = "date") %>%
    full_join(select(english, date, total_e), by = "date") %>%
    full_join(select(spanish, date, total_s), by = "date") 
  })

# convert to xts
  totals_ <- reactive({
    totals <- totals()
    xts(totals, order.by = totals$date)
  })

# plot
  renderDygraph({

  totals_ <- totals_()

  if (input$diss == 1) {
  dygraph(totals_[, "total"],
          main= "All") %>%
    dySeries("total", label = "All") %>%
    dyRangeSelector() %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else if (input$diss == 2) {
    dygraph(totals_[, c("total_f", "total_m")],
            main = "By sex") %>%
    dyRangeSelector() %>%
    dySeries("total_f", label = "Female") %>%
    dySeries("total_m", label = "Male") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else {
    dygraph(totals_[, c("total_e", "total_s")],
            main = "By language") %>%
    dyRangeSelector() %>%
    dySeries("total_e", label = "English") %>%
    dySeries("total_s", label = "Spanish") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE)
  }
  })

3 回答

感谢您解释有关您目标的更多信息 . 我认为@ simon-s-a建议的方法会简化事情 . 如果我们可以动态运行分组并对其进行结构化，以便我们不需要事先知道这些组中可能的组件，那么维护起来会容易得多 .

这是一个最小的可行产品，它可以重建绘图功能，在其中包含分组逻辑 .

按日期分组后，无论我们的分组变量是什么，它都会计算每个组的行数，然后展开这些行，以便每个组获得一列 .
然后我使用 padr::pad 来填充中间任何缺少的时间行，并用零替换所有NA .
最后，该数据帧被转换为 xts 对象并送入dygraph，它似乎自动处理多个列 .

这里：

---
title: "test"
output: 
  flexdashboard::flex_dashboard:
    theme: bootstrap
runtime: shiny
---

```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)

# generate data
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"), 
                             as.Date("2018-06-30"), 
                             "days"),
                  sex = sample(c("male", "female"), 181, replace=TRUE),
                  lang = sample(c("english", "spanish"), 181, replace=TRUE),
                  age = sample(20:35, 181, replace=TRUE))
dat <- dplyr::sample_n(dat, 80)


radioButtons("diss", label = "Disaggregation",
             choices = list("All" = "Total",
                            "By Sex" = "sex",
                            "By Language" = "lang"), 
             selected = "Total")

Page 1


renderDygraph({
  grp_col <- rlang::sym(input$diss) # This converts the input selection to a symbol

  dat %>%
    mutate(Total = 1) %>% # This is a hack to let us "group" by Total -- all one group

    # Here's where we unquote the symbol so that dplyr can use it 
    #   to refer to a column. In this case I make a dummy column 
    #   that's a copy of whatever column we want to group
    mutate(my_group = !!grp_col) %>%

    # Now we make a group for every existing combination of week 
    #   (using lubridate::floor_date) and level of our grouping column,
    #   count how many rows in each group, and spread that to wide format.
    group_by(date = lubridate::floor_date(date, "1 week"), my_group) %>%
    count() %>% spread(my_group, n) %>% ungroup() %>%

    # padr:pad() fills in any missing weeks in the sequence with new rows
    #   Then we replace all the NA's with zeroes.
    padr::pad() %>% replace(is.na(.), 0) %>%

    # Finally we can convert to xts and feed the wide table into digraph.
    xts::xts(order.by = .$date) %>%
    dygraph() %>%
    dyRangeSelector() %>%
    dyOptions(
      useDataTimezone = FALSE, stepPlot = TRUE,
      drawGrid = FALSE, fillGraph = TRUE
    )
})

回复于 2024-05-06T09:50:48+08:00

1
这是一个创建函数，缩短代码并使其不易出错的好地方 .

http://r4ds.had.co.nz/functions.html

一个复杂的问题是使用 dplyr 进行编程通常需要涉及一个名为tidyeval的框架，这个框架非常强大但可能令人生畏 . https://dplyr.tidyverse.org/articles/programming.html

（这是一种回避tidyeval的替代方法：https://cran.r-project.org/web/packages/seplyr/vignettes/using_seplyr.html）

在您的场景中，可以通过在函数之前和之后进行一些操作来完全避免这些挑战 . 它并不优雅，但有效 .

顺便说一下，我不能保证它会工作，因为你没有共享一个可验证的代表（例如包括与你的形式相同的数据样本），但它与我编写的假数据一起工作 . （见下文 . ）抱歉，我错过了提供样本数据的块 .
```
prep_dat <- function(filtered_dat, col_name = "total") {
  filtered_dat %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
  tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
    select(date, new) %>%
    tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
    group_by(date) %>%
    mutate(total = sum(new, na.rm = TRUE)) %>%
    distinct(date, .keep_all = TRUE) %>%
    ungroup() %>%
    # expand matrix to include weeks without data
    complete(
      date = seq(date[1], date[length(date)], by = "1 week"),
      fill = list(total = 0)
    )
}
```
然后，您可以使用过滤后的数据和总列的名称来调用它 . 这个片段应该能够替换你当前使用的~20行：
```
males <- prep_dat(dat_fake %>% 
  filter(sex == "male")) %>% 
  rename("total_m" = "total")
```
我测试的虚假数据：
```
dat_fake <- tibble(
  date = as.Date("2018-01-01") + runif(500, 0, 100),
  new  = runif(500, 0, 100),
  sex  = sample(c("male", "female"), 
                500, replace = TRUE),
  lang = sample(c("english", "french", "spanish", "portuguese", "tagalog"), 
                500, replace = TRUE)
)
```
回复于 2024-05-06T09:50:48+08:00
1
我认为你可以通过改变准备的顺序来获得一些收益 . 现在，您的应用流程大约是：

数据=>准备所有组合=>选择所需的可视化=>制作绘图

请考虑一下：

数据=>选择所需的可视化=>准备所需的组合=>制作绘图

这将利用Shiny的反应性来（重新）准备所请求的绘图所需的数据以响应用户选择的变化 .

通过代码片段（抱歉，我对 flexdashboard 和 tibbletime 没有足够的熟悉以确保此代码运行，但我希望它足以突出显示该方法）：

你的控件选择你想要关注的列（注意我们使用 "All" = "'1'" 所以这会在group-by中计算为常量，否则必须单独处理）：
```
radioButtons("diss", label = "Disaggregation",
             choices = list("All" = "'1'",
                            "By Sex" = "sex",
                            "By Language" = "lang",
                            "By other" = "column_name_of_'other'"), 
             selected = 1)
```
然后在您的组中使用它来仅准备当前可视化所需的数据（您需要调整@Jon_Spring建议的功能以响应此前面的分组）：
```
preped_dat = reactive({
  dat %>%
    group_by_(input$diss) %>%
    # etc
})
```
在绘图之前（您需要调整绘图功能以响应数据格式的可能变化）：
```
renderDygraph({
  totals = preped_data()
  dygraph(totals) %>%
      dySeries("total", label = ) %>%
      dyRangeSelector()
})
```
关于 group_by ，如果所有参数都是文本字符串，则可以使用 group_by_ ;如果要将控件中的文本字符串输入与其他列名混合，则可以使用 group_by(!! sym(input$diss), other_column_name) .

如果您的数据集很大，这种方法变化的一个可能的缺点是交互期间的响应性降低 . 本方法预先进行所有计算，然后对每个选择进行最小化计算 - 如果您进行大量处理，这可能更为可取 . 我建议的方法将具有最小的前期处理和适度的计算每个选择 .
回复于 2024-05-06T09:50:48+08:00

在时间序列的背景下分解

Sidebar

Page 1

Sidebar

Page 1

3 回答

Sidebar

Page 1

在时间序列的背景下分解

Sidebar

Page 1

Sidebar

Page 1

3 回答

Sidebar

Page 1

相关问题