我的样本 .

data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x", 
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 0L, 0L, 0L, 53L, 
10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 34L, 
88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 41L, 
23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 12L, 
58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("female", "male"), class = "factor")), .Names = c("add", 
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))

在这个数据中有组变量（性别（男性和女性）我需要获得统计学平均值和25％的所有男性在女性之前 . 男性在女性之后，我不会触摸 . 这是分析 x 和 y 分析来自添加栏 . 如果对于男性来说，女性 Value 超过25％，这是我们在女性之前为男性计算的，那么这个值必须用男性的平均值代替女性“女性类别我们不接触” .

AntoniosK的解决方案非常好

library(tidyverse)
library(data.table)

data %>%  
  group_by(add) %>%                                           # for each add do the below...
  mutate(group2 = rleid(group)) %>% 
  group_by(add, group, group2) %>%
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  group_by(add) %>%                                            # for each add update x1 values....
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
  ungroup() %>%
  select(-group2) %>%
  data.frame()

但现在我想将x1替换为0到Na .

data$x1[data$x1 == 0] <- NA

在它之后，当我取消脚本时，我得到 error

mutate_impl（.data，dots）中的错误：评估错误：缺少值，如果'na.rm'为FALSE则不允许NaN .

该怎么做，该脚本通过NA并仅使用int值？

编辑

data=structure(list(add = c(11202L, 11202L, 11202L, 11202L, 11202L, 
                       11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 
                       11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 
                       11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 
                       11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 
                       11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 
                       11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L), x1 = c(NA, 
                                                                                       2L, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, 1L, NA, NA, NA, 
                                                                                       NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, 
                                                                                       NA, NA, NA, NA, NA, NA, NA, 3L, NA, NA, NA, NA, 1L, 1L, NA, NA, 
                                                                                       NA, NA, NA), group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 
                                                                                                                        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("female", 
                                                                                                                                                                                        "male"), class = "factor")), .Names = c("add", "x1", "group"), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                           -52L))

library(tidyverse)
library(data.table)

data %>%  
  group_by(add) %>%                                          
  mutate(group2 = rleid(group)) %>% 
  group_by(add, group, group2) %>%
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  group_by(add) %>%                                           
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1),
         x1 = ifelse(x1==0, NA, x1)) %>%  # new code added
  ungroup() %>%
  select(-group2) %>%
  data.frame()

Edit2

代码的结果

add x1  group   MEAN    Q25
x   14.00000    male    23.72727    5.0
x   15.00000    male    23.72727    5.0
x   36.00000    male    23.72727    5.0
x   0.00000 male    23.72727    5.0
x   0.00000 male    23.72727    5.0
x   0.00000 male    23.72727    5.0
x   53.00000    male    23.72727    5.0
x   10.00000    male    23.72727    5.0
x   39.00000    male    23.72727    5.0
x   27.00000    male    23.72727    5.0
x   67.00000    male    23.72727    5.0
x   25.00000    female  NaN NA
x   19.00000    female  NaN NA
x   49.00000    female  NaN NA
x   53.00000    female  NaN NA
x   64.00000    female  NaN NA
x   61.00000    female  NaN NA
x   12.00000    female  NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA
x   23.72727    male    NaN NA

后

add x1     group
x   94.90   male

女性的前4名男性总和= 94.90

1 回答

我添加了一段代码来解决您的问题并简要解释错误 .

Updated code

data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x", 
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 0L, 0L, 0L, 53L, 
10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 34L, 
88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 41L, 
23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 12L, 
58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("female", "male"), class = "factor")), .Names = c("add", 
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))

library(tidyverse)
library(data.table)

data %>%  
  group_by(add) %>%                                          
  mutate(group2 = rleid(group)) %>% 
  group_by(add, group, group2) %>%
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  group_by(add) %>%                                           
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1),
         x1 = ifelse(x1==0, NA, x1)) %>%  # new code added
  ungroup() %>%
  select(-group2) %>%
  data.frame()

Error explanation

您必须运行代码的前一部分，最后只需更新 x1 列 . 您收到该错误，因为 NA 值会破坏您需要执行的 mean 和 quantile 计算 .

另一种方法是在开头更新 x1 然后使用 na.rm=T 进行计算 .

对于你提到的 new case ，你从 x1 的 NA 值开始，试试这个：

data %>%  
  group_by(add) %>%                                          
  mutate(group2 = rleid(group)) %>% 
  group_by(add, group, group2) %>%
  mutate(MEAN = mean(x1[group=="male" & group2==1], na.rm = T),      ## extra code here ##    
         Q25 = quantile(x1[group=="male" & group2==1], 0.25, na.rm = T)) %>%  ## extra code here ##
  group_by(add) %>%                                           
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1))%>%
  ungroup() %>%
  select(-group2) %>%
  data.frame()

对于您提到的 new case （编辑2），首先将前一代码的输出保存为 data2 ：

data2 = data %>% ...

然后运行这个：

data2 %>%
  group_by(add) %>%                           # for each add value                      
  mutate(group2 = rleid(group)) %>%           # created group2
  filter(group=="male" & group2==3) %>%       # keep only male after female
  summarise(SUM = sum(x1[row_number() <= 4])) # get sum of x1 for first 4 rows

# # A tibble: 2 x 2
#   add     SUM
#   <fct> <dbl>
# 1 x      94.9
# 2 y     107.

回复于 2024-05-17T11:49:00+08:00

在通过将R中的var与NA对照分组来分离某些观察之前选择组

编辑

Edit2

1 回答

相关问题