首页 文章

如何在R中将两个表与'holes'合并

提问于
浏览
1

我有来自三种不同实验条件的数据,有两种不同的模式,我已经计算并存储在第一个data.frame中,每个模式的每个条件的均值和sd . 看起来像

t1 <- data.frame(condition = rep(c("A","B","C"),times=2),
             mode = rep(c(1,2),each=3),
             mean = rnorm(6),
             sd = rnorm(6))

然后我计算了每个成对比较条件的mann-whitney秩和检验 . 生成的data.frame看起来像这样:

t2 <- data.frame(mode = rep(c(1,2),each=3),
             test = rep(c("AvB","AvC","BvC"),2),
             pvalue = runif(6,0,0.2)
             )

我想将单个data.frame中的两个data.frames与'holes'结合起来,每个测试都显示模式,实际测试的conidtions的平均值和p值 . 我记得这样的事情:

mode    mean A  mean B  mean C  test    pvalue
1       1.34    1.12            A v B   0.067
1       1.34            0.98    A v C   0.021
1               1.12    0.98    B v C   0.345

我知道结果表中有漏洞 - 不太适合在其上运行统计数据 - 但我的目标是创建一个.tex表(使用xtable())来显示所有可能的成对测试,并且同时,在每种模式下显示每种条件下变量的均值 . 这是在纸张附录中显示的 .

谢谢!

3 回答

  • 1

    没有使用reshape2的plyr或dplyr的解决方案 . 它可以处理更多条件和测试,不需要硬编码比较 .

    library(reshape2)
    #make column with variables that are tested (so can be used if many pairs/changing
    #names as long as there is a clear separation
    t2$var1 <- gsub("v.+","",t2$test)
    t2$var2 <- gsub(".+v","",t2$test)
    
    #combine data in long format
    t3 <- merge(t1,t2,by="mode")
    
    #create mean_na vector with removed means when test is not performed
    t3$mean_na <- with(t3, ifelse(condition==var1|condition==var2,mean,NA))
    
    #cast to table
    res_temp <- dcast(mode+test~condition,value.var="mean_na",data=t3)
    res_temp
    
    #re-add pvalue
    result_final <- merge(res_temp,t2[,c("mode","test","pvalue")],by=c("mode","test"))
    
    > result_final
      mode test           A          B        C     pvalue
    1    1  AvB -0.56047565 -0.2301775       NA 0.13114116
    2    1  AvC -0.56047565         NA 1.558708 0.14170609
    3    1  BvC          NA -0.2301775 1.558708 0.10881320
    4    2  AvB  0.07050839  0.1292877       NA 0.11882840
    5    2  AvC  0.07050839         NA 1.715065 0.05783195
    6    2  BvC          NA  0.1292877 1.715065 0.02942273
    
  • 0

    这是一个想法:

    library(tidyr)
    library(dplyr)
    
    t2 %>% 
      separate(test, into = c("cond1", "cond2"), sep = "v", remove = FALSE) %>%
      gather(key, condition, -mode, -pvalue, -test) %>%
      select(-key) %>%
      left_join(t1 %>% select(-sd)) %>% 
      spread(condition, mean) %>%
      setNames(c(names(.)[1:3], paste("mean", names(.)[4:6])))
    

    这使:

    #  mode test      pvalue     mean A    mean B     mean C
    #1    1  AvB 0.053444134 -0.6264538 0.1836433         NA
    #2    1  AvC 0.077222819 -0.6264538        NA -0.8356286
    #3    1  BvC 0.002678067         NA 0.1836433 -0.8356286
    #4    2  AvB 0.076477591  1.5952808 0.3295078         NA
    #5    2  AvC 0.173938169  1.5952808        NA -0.8204684
    #6    2  BvC 0.068069799         NA 0.3295078 -0.8204684
    

    如果要将结果传递给xtable,可以添加:

    ... %>% xtable() %>% print(type = "html")

    <!-- html table generated in R 3.2.1 by xtable 1.7-4 package -->
    <!-- Fri Jun 19 18:45:36 2015 -->
    <table border=1>
    <tr> <th>  </th> <th> mode </th> <th> test </th> <th> pvalue </th> <th> mean A </th> <th> mean B </th> <th> mean C </th>  </tr>
    <tr> <td align="right"> 1 </td> <td align="right"> 1.00 </td> <td> AvB </td> <td align="right"> 0.05 </td> <td align="right"> -0.63 </td> <td align="right"> 0.18 </td> <td align="right">  </td> </tr>
    <tr> <td align="right"> 2 </td> <td align="right"> 1.00 </td> <td> AvC </td> <td align="right"> 0.08 </td> <td align="right"> -0.63 </td> <td align="right">  </td> <td align="right"> -0.84 </td> </tr>
    <tr> <td align="right"> 3 </td> <td align="right"> 1.00 </td> <td> BvC </td> <td align="right"> 0.00 </td> <td align="right">  </td> <td align="right"> 0.18 </td> <td align="right"> -0.84 </td> </tr>
    <tr> <td align="right"> 4 </td> <td align="right"> 2.00 </td> <td> AvB </td> <td align="right"> 0.08 </td> <td align="right"> 1.60 </td> <td align="right"> 0.33 </td> <td align="right">  </td> </tr>
    <tr> <td align="right"> 5 </td> <td align="right"> 2.00 </td> <td> AvC </td> <td align="right"> 0.17 </td> <td align="right"> 1.60 </td> <td align="right">  </td> <td align="right"> -0.82 </td> </tr>
    <tr> <td align="right"> 6 </td> <td align="right"> 2.00 </td> <td> BvC </td> <td align="right"> 0.07 </td> <td align="right">  </td> <td align="right"> 0.33 </td> <td align="right"> -0.82 </td> </tr>
    </table>
    

    Data

    set.seed(1)
    
    t1 <- data.frame(condition = rep(c("A","B","C"),times=2),
                     mode = rep(c(1,2),each=3),
                     mean = rnorm(6),
                     sd = rnorm(6))
    
    t2 <- data.frame(mode = rep(c(1,2),each=3),
                     test = rep(c("AvB","AvC","BvC"),2),
                     pvalue = runif(6,0,0.2))
    
  • 1

    这适用于相当小的数据;如果你真的有更多的条件,这种方法将不会很有趣:

    library(reshape2)
    library(dplyr)
    
    means <- dcast(t1, mode~condition, value.var='mean')
    
    results <- merge(t2, means) %>%
        # extract conditions of each test row
        mutate(first=substr(test,1,1), second=substr(test,3,3), 
               # replace untested condition means with ""
                  meanA=ifelse(first=='A'|second=='A', A, ""),
                  meanB=ifelse(first=='B'|second=='B', B, ""),
                  meanC=ifelse(first=='C'|second=='C', C, "")) %>%
        # discard unwanted columns
        select(mode, meanA, meanB, meanC, test, pvalue)
    
    > print(results)
      mode  meanA  meanB meanC test     pvalue
    1    1 -0.646 -0.778        AvB 0.03597074
    2    1 -0.646        0.014  AvC 0.11394383
    3    1        -0.778 0.014  BvC 0.14393726
    4    2  0.083  0.275        AvB 0.16493966
    5    2  0.083        0.619  AvC 0.06548713
    6    2         0.275 0.619  BvC 0.08474118
    

相关问题