首页 文章

无法使用dplyr mutate添加列

提问于
浏览
1

我目前有这个数据框

dat = data.frame(time= c("Q1","Q2","Q3"), 
                     measure1 = c(1,2,9   ),
                     measure2 = c(4,5,6   ))
    dat2 = dat %>% gather(key= Metric, value = Value, c(measure1, measure2)) %>% group_by(Metric, time)
dat2 = as.data.frame(dat2)

time   Metric Value
1   Q1 measure1     1
2   Q2 measure1     2
3   Q3 measure1     9
4   Q1 measure2     4
5   Q2 measure2     5
6   Q3 measure2     6

我可以像这样添加这个“测试”列

dat2$test=  ifelse(  dat2$Metric =="measure1" &  dat2$Value >   dat2$Value[ dat2$Metric=="measure2"] ,1,
                               ifelse(  dat2$Metric == "measure2" &   dat2$Value >  dat2$Value[ dat2$Metric=="measure1"] ,1,-1)
                               ) #end ifelse

 time   Metric Value test
1   Q1 measure1     1   -1
2   Q2 measure1     2   -1
3   Q3 measure1     9    1
4   Q1 measure2     4    1
5   Q2 measure2     5    1
6   Q3 measure2     6   -1

但我想像这样使用dplyr和mutate

dat = data.frame(time= c("Q1","Q2","Q3"), 
                 measure1 = c(1,2,9   ),
                 measure2 = c(4,5,6   ))
dat %>% gather(key= Metric, value = Value, c(measure1, measure2)) %>% group_by(Metric, time) %>%
     mutate(test= ifelse( Metric =="measure1" &  Value > Value[Metric=="measure2"] ,1,
                               ifelse( Metric =="measure2" &  Value > Value[Metric=="measure1"] ,1,-1)
                               ) #end ifelse
                        )#end mutate
dat2 = as.data.frame(dat2)
dat2

但“测试”栏都是NA

time   Metric Value test
1   Q1 measure1     1   NA
2   Q2 measure1     2   NA
3   Q3 measure1     9   NA
4   Q1 measure2     4   NA
5   Q2 measure2     5   NA
6   Q3 measure2     6   NA

为什么你不能使用mutate添加列?它是否与使用as.data.frame并手动添加有关... mutate无法识别列名?

谢谢 .

1 回答

  • 3

    问题在于,当您按 Metric 分组时,对于 Metricmeasure1 的组, Metric == "measure2" 将始终返回FALSE和 Value[FALSE] == numeric(0) ,在 mutate 之后,零长度对象将转换为NA:

    dat %>% 
            gather(key= Metric, value = Value, c(measure1, measure2)) %>% 
            group_by(time) %>%        # <<<<<<<<<<<< here
            mutate(test= ifelse(Metric =="measure1" & Value > Value[Metric=="measure2"] ,1,
                                ifelse(Metric =="measure2" & Value > Value[Metric=="measure1"],1,-1)
                                ) #end ifelse
                  )
    
    # Source: local data frame [6 x 4]
    # Groups: time [3]
    
    #     time   Metric Value  test
    #   <fctr>    <chr> <dbl> <dbl>
    # 1     Q1 measure1     1    -1
    # 2     Q2 measure1     2    -1
    # 3     Q3 measure1     9     1
    # 4     Q1 measure2     4     1
    # 5     Q2 measure2     5     1
    # 6     Q3 measure2     6    -1
    

    通过此示例,您可以更清楚地看到按Metric分组时发生的情况:

    dat %>% 
            gather(key= Metric, value = Value, c(measure1, measure2)) %>% 
            group_by(Metric, time) %>%
            mutate(test= Value[Metric == "measure2"])
    
    # Source: local data frame [6 x 4]
    # Groups: Metric, time [6]
    
    #     time   Metric Value  test
    #   <fctr>    <chr> <dbl> <dbl>
    # 1     Q1 measure1     1    NA
    # 2     Q2 measure1     2    NA
    # 3     Q3 measure1     9    NA
    # 4     Q1 measure2     4     4
    # 5     Q2 measure2     5     5
    # 6     Q3 measure2     6     6
    

相关问题