首页 文章

在通过将R中的var分组来分隔某些观察之前选择组

提问于
浏览
0

在本主题的延续select group before certain observations in R我有分组var - add (x或y)

data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x", 
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 53L, 95L, 56L, 
53L, 10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 
34L, 88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 
41L, 23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 
12L, 58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("female", "male"), class = "factor")), .Names = c("add", 
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))

这个分析如何按群体划分?

AntoniosK的解决方案非常好

library(tidyverse)
library(data.table)

data %>%
  group_by(group, group2 = rleid(group)) %>%                       
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  ungroup() %>%
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
  ungroup() %>%
  select(-group2) %>%
  data.frame()

但如果我想要x和y组分别执行它 . 我这样做了

data %>% group_by(add) %>% 
  group_by(group, group2 = rleid(group)) %>%                       
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  ungroup() %>%
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
  ungroup() %>%
  select(-group2) %>%
  data.frame()

结果不正确的统计数据

add       x1  group     MEAN   Q25
1    x 14.00000   male 46.86364 26.25
2    x 15.00000   male 46.86364 26.25
3    x 36.00000   male 46.86364 26.25
4    x 53.00000   male 46.86364 26.25
5    x 95.00000   male 46.86364 26.25
6    x 56.00000   male 46.86364 26.25
7    x 53.00000   male 46.86364 26.25
8    x 10.00000   male 46.86364 26.25
9    x 39.00000   male 46.86364 26.25
10   x 27.00000   male 46.86364 26.25
11   x 67.00000   male 46.86364 26.25
12   x 25.00000 female      NaN    NA
13   x 19.00000 female      NaN    NA
14   x 49.00000 female      NaN    NA
15   x 53.00000 female      NaN    NA
16   x 64.00000 female      NaN    NA
17   x 61.00000 female      NaN    NA
18   x 12.00000 female      NaN    NA
19   x 46.86364   male      NaN    NA
20   x 46.86364   male      NaN    NA
21   x 46.86364   male      NaN    NA
22   x 46.86364   male      NaN    NA
23   x 46.86364   male      NaN    NA
24   x 46.86364   male      NaN    NA
25   x 46.86364   male      NaN    NA
26   x 46.86364   male      NaN    NA
27   y 37.00000   male 46.86364 26.25
28   y 90.00000   male 46.86364 26.25
29   y 66.00000   male 46.86364 26.25
30   y 39.00000   male 46.86364 26.25
31   y 59.00000   male 46.86364 26.25
32   y 96.00000   male 46.86364 26.25
33   y 41.00000   male 46.86364 26.25
34   y 23.00000   male 46.86364 26.25
35   y 20.00000   male 46.86364 26.25
36   y 26.00000   male 46.86364 26.25
37   y 69.00000   male 46.86364 26.25
38   y 28.00000 female      NaN    NA
39   y 35.00000 female      NaN    NA
40   y 96.00000 female      NaN    NA
41   y 87.00000 female      NaN    NA
42   y 82.00000 female      NaN    NA
43   y 70.00000 female      NaN    NA
44   y 68.00000 female      NaN    NA
45   y 26.00000   male      NaN    NA
46   y 12.00000   male      NaN    NA
47   y 46.86364   male      NaN    NA
48   y 18.00000   male      NaN    NA
49   y 46.86364   male      NaN    NA
50   y 46.86364   male      NaN    NA
51   y  3.00000   male      NaN    NA
52   y 46.86364   male      NaN    NA

女性之前的男性为 x =男性之前的男性为 y 的平均值为_8596601 = 51

2 回答

  • 1

    这应该工作:

    data %>%  
      group_by(add) %>%                                           # for each add do the below...
      mutate(group2 = rleid(group)) %>% 
      group_by(add, group, group2) %>%
      mutate(MEAN = mean(x1[group=="male" & group2==1]),               
             Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
      group_by(add) %>%                                            # for each add update x1 values....
      mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
      ungroup() %>%
      select(-group2) %>%
      data.frame()
    
  • 2

    由于已接受的答案已经使用 data.table 包中的 rleid() 函数,我建议也可以通过组引用更新获益

    library(data.table)
    setDT(data)[, rleid := rleid(group), by = add][
      rleid == 1L, `:=`(mean = mean(x1), Q25 = quantile(x1, 0.25)), by = add][
        , rleid := NULL][]
    

    添加x1组意味着Q25
    1:x 14男性42.27273 21.0
    2:x 15男性42.27273 21.0
    3:x 36男性42.27273 21.0
    4:x 53男性42.27273 21.0
    5:x 95男性42.27273 21.0
    6:x 56男性42.27273 21.0
    7:x 53男性42.27273 21.0
    8:x 10男性42.27273 21.0
    9:x 39男性42.27273 21.0
    10:x 27男性42.27273 21.0
    11:x 67男性42.27273 21.0
    12:x 25女NA NA
    13:x 19女NA NA
    14:x 49女NA NA
    15:x 53女NA NA
    16:x 64女NA NA
    17:x 61女NA NA
    18:x 12女NA NA
    19:x 75男NA NA
    20:x 34男NA NA
    21:x 88男NA NA
    22:x 43男NA NA
    23:x 85男NA NA
    24:x 93男NA NA
    25:x 44男NA NA
    26:x 31男NA NA
    27:y 37男51.45455 31.5
    28:y 90男51.45455 31.5
    29:y 66男51.45455 31.5
    30:y 39男51.45455 31.5
    31:59岁男性51.45455 31.5
    32:y 96男51.45455 31.5
    33:y 41男51.45455 31.5
    34:y 23男51.45455 31.5
    35:y 20男51.45455 31.5
    36:y 26男51.45455 31.5
    37:y 69男51.45455 31.5
    38:y 28女NA NA
    39:y 35女NA NA
    40:y 96女NA NA
    41:y 87女NA NA
    42:y 82女NA NA
    43:y 70女NA NA
    44:y 68女NA NA
    45:26岁男性NA NA
    46:12岁男性NA NA
    47:y 58男NA NA
    48:18岁男性NA NA
    49:76岁男性NA NA
    50:y 93男NA NA
    51:y 3男NA NA
    52:y 31男NA NA
    添加x1组意味着Q25

相关问题