在本主题的延续select group before certain observations in R我有分组var - add
(x或y)
data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x",
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 53L, 95L, 56L,
53L, 10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L,
34L, 88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L,
41L, 23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L,
12L, 58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("female", "male"), class = "factor")), .Names = c("add",
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))
这个分析如何按群体划分?
AntoniosK的解决方案非常好
library(tidyverse)
library(data.table)
data %>%
group_by(group, group2 = rleid(group)) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
ungroup() %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
ungroup() %>%
select(-group2) %>%
data.frame()
但如果我想要x和y组分别执行它 . 我这样做了
data %>% group_by(add) %>%
group_by(group, group2 = rleid(group)) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
ungroup() %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
ungroup() %>%
select(-group2) %>%
data.frame()
结果不正确的统计数据
add x1 group MEAN Q25
1 x 14.00000 male 46.86364 26.25
2 x 15.00000 male 46.86364 26.25
3 x 36.00000 male 46.86364 26.25
4 x 53.00000 male 46.86364 26.25
5 x 95.00000 male 46.86364 26.25
6 x 56.00000 male 46.86364 26.25
7 x 53.00000 male 46.86364 26.25
8 x 10.00000 male 46.86364 26.25
9 x 39.00000 male 46.86364 26.25
10 x 27.00000 male 46.86364 26.25
11 x 67.00000 male 46.86364 26.25
12 x 25.00000 female NaN NA
13 x 19.00000 female NaN NA
14 x 49.00000 female NaN NA
15 x 53.00000 female NaN NA
16 x 64.00000 female NaN NA
17 x 61.00000 female NaN NA
18 x 12.00000 female NaN NA
19 x 46.86364 male NaN NA
20 x 46.86364 male NaN NA
21 x 46.86364 male NaN NA
22 x 46.86364 male NaN NA
23 x 46.86364 male NaN NA
24 x 46.86364 male NaN NA
25 x 46.86364 male NaN NA
26 x 46.86364 male NaN NA
27 y 37.00000 male 46.86364 26.25
28 y 90.00000 male 46.86364 26.25
29 y 66.00000 male 46.86364 26.25
30 y 39.00000 male 46.86364 26.25
31 y 59.00000 male 46.86364 26.25
32 y 96.00000 male 46.86364 26.25
33 y 41.00000 male 46.86364 26.25
34 y 23.00000 male 46.86364 26.25
35 y 20.00000 male 46.86364 26.25
36 y 26.00000 male 46.86364 26.25
37 y 69.00000 male 46.86364 26.25
38 y 28.00000 female NaN NA
39 y 35.00000 female NaN NA
40 y 96.00000 female NaN NA
41 y 87.00000 female NaN NA
42 y 82.00000 female NaN NA
43 y 70.00000 female NaN NA
44 y 68.00000 female NaN NA
45 y 26.00000 male NaN NA
46 y 12.00000 male NaN NA
47 y 46.86364 male NaN NA
48 y 18.00000 male NaN NA
49 y 46.86364 male NaN NA
50 y 46.86364 male NaN NA
51 y 3.00000 male NaN NA
52 y 46.86364 male NaN NA
女性之前的男性为 x
=男性之前的男性为 y
的平均值为_8596601 = 51
2 回答
这应该工作:
由于已接受的答案已经使用
data.table
包中的rleid()
函数,我建议也可以通过组引用更新获益