好的，这是我想用dplyr实现的全部视图：

Example

使用dplyr我正在进行计算以形成新列 .

initial.capital - 
x.long.shares - 
x.end.value - 
x.net.profit - 
new.initial.capital

执行此操作的代码：

# Calculate Share Prices For Each ETF 
# Initialize Start Capital Column 
library(dplyr)
library(data.table)
df$inital.capital <- 10000
output <- df %>%
  dplyr::mutate(RunID = data.table::rleid(x.long)) %>%
  group_by(RunID) %>%
  dplyr::mutate(x.long.shares = ifelse(x.long == 0,0, 
                                       ifelse(row_number() == n(),
                                      first(inital.capital) / first(close.x),0))) %>%
  dplyr::mutate(x.end.value = ifelse(x.long == 0,0, 
                                       ifelse(row_number() == n(),
                                              last(x.long.shares) * last(close.x),0))) %>%
  dplyr::mutate(x.net.profit = ifelse(x.long == 0,0, 
                                     ifelse(row_number() == n(),
                                            last(initial.capital) - last(x.end.value),0))) %>%
  dplyr::mutate(new.initial.capital = ifelse(x.long == 0,0, 
                                      ifelse(row_number() == n(),
                                             last(x.net.profit) + last(inital.capital),0))) %>%

  ungroup() %>%
  select(-RunID)

我按x.long列分组 . 并在分组时 . 使用组内的第一个/最后一个位置从不同列进行计算我的基本问题是：

在照片中，请参阅new.initial.capital列下的红色突出显示 . 如何“保存”此值（10185.33）...并将其插入NEXT组，将其保存在initial.capital列下，再次以红色突出显示（它将替换10,000或将其存储在组的第一行）？

编辑

我真正需要做的是将new.initial.capital列中的最终值保存到变量中 . 然后这个变量可以在下一组中使用（参见下面的代码）这里的值将用作下一组计算的一部分...然后当更新结束new.initial.capital时，这个值进入变量，然后它转到下一组的开头（见下面的代码）..然后所有的值将再次更新....变量将放在这里：

output <- df %>%
  dplyr::mutate(RunID = data.table::rleid(x.long)) %>%
  group_by(RunID) %>%
  dplyr::mutate(x.long.shares = ifelse(x.long == 0,0, 
                                       ifelse(row_number() == n(),
                                      first(end_of_new.initial.capital_variable_from_previous_group) / first(close.x),0))) %>%

我基本上想在dplyr组之间传递值 . 这可能吗？或者我每次都可以将它存储在变量中？

下面是照片中的一些示例数据：保存到.txt

df <- read.table("your_dir\df.txt",header=TRUE, sep="", stringsAsFactors=FALSE)

    close.x x.long  y.short x.short y.long  inital.capital  x.long.shares   x.end.value x.net.profit    new.initial.capital
37.96   NA  NA  NA  NA  10000   NA  NA  NA  NA
36.52   0   0   0   0   10000   0   0   0   0
38.32   0   0   0   0   10000   0   0   0   0
38.5504 0   0   0   0   10000   0   0   0   0
38.17   0   0   0   0   10000   0   0   0   0
38.85   1   1   0   0   10000   0   0   0   0
38.53   1   1   0   0   10000   0   0   0   0
39.13   1   1   0   0   10000   0   0   0   0
38.13   1   1   0   0   10000   257.4002574 9814.671815 185.3281853 10185.32819
37.01   0   0   1   1   10000   0   0   0   0
36.14   0   0   1   1   10000   0   0   0   0
35.27   0   0   1   1   10000   0   0   0   0
35.13   0   0   1   1   10000   0   0   0   0
32.2    0   0   1   1   10000   0   0   0   0
33.03   1   1   0   0   10000   0   0   0   0
34.94   1   1   0   0   10000   0   0   0   0
34.57   1   1   0   0   10000   0   0   0   0
33.6    1   1   0   0   10000   0   0   0   0
34.34   1   1   0   0   10000   302.7550711 10396.60914 -396.6091432    9603.390857
35.86   0   0   1   1   10000   0   0   0   0

我试过了什么

我试着做一个变量：

inital.capital <- 10000

并在代码中插入...

output <- df %>%
  dplyr::mutate(RunID = data.table::rleid(x.long)) %>%
  group_by(RunID) %>%
  dplyr::mutate(x.long.shares = ifelse(x.long == 0,0, 
                                       ifelse(row_number() == n(),
                                              initial.capital / first(close.x),0))) %>%   # place initial.capital variable.. initialized with 10000
  dplyr::mutate(x.end.value = ifelse(x.long == 0,0, 
                                       ifelse(row_number() == n(),
                                              last(x.long.shares) * last(close.x),0))) %>%
  dplyr::mutate(x.net.profit = ifelse(x.long == 0,0, 
                                     ifelse(row_number() == n(),
                                            last(initial.capital) - last(x.end.value),0))) %>%
  dplyr::mutate(new.initial.capital = ifelse(x.long == 0,0, 
                                      ifelse(row_number() == n(),
                                             last(x.net.profit) + last(inital.capital),0))) %>%
  dplyr::mutate(new.initial.capitals = ifelse(x.long == 0,0, 
                                             ifelse(row_number() == n(),
                                                    inital.capital < - last(new.initial.capital),0))) %>%  # update variable with the final balance of new.inital.capital column

  ungroup() %>%
  select(-RunID)

如果我每次都可以更新initial.capital变量 . 然后，这将作为组之间的“链接” . 但是，这个想法目前还没有在dplyr设置中工作 .

任何协助赞赏 .

6 回答

你在问题中使用data.table并标记了data.table这个问题，所以这里是一个data.table答案 . 当 j 计算时，它处于静态范围内，其中局部变量保留其来自前一组的值 .

使用虚拟数据来演示：

require(data.table)
set.seed(1)
DT = data.table( long = rep(c(0,1,0,1),each=3),
                 val = sample(5,12,replace=TRUE))
DT
    long val
 1:    0   2
 2:    0   2
 3:    0   3
 4:    1   5
 5:    1   2
 6:    1   5
 7:    0   5
 8:    0   4
 9:    0   4
10:    1   1
11:    1   2
12:    1   1

DT[, v1:=sum(val), by=rleid(long)][]
    long val v1
 1:    0   2  7
 2:    0   2  7
 3:    0   3  7
 4:    1   5 12
 5:    1   2 12
 6:    1   5 12
 7:    0   5 13
 8:    0   4 13
 9:    0   4 13
10:    1   1  4
11:    1   2  4
12:    1   1  4

到目前为止，足够简单 .

prev = NA  # initialize previous group value
DT[, v2:={ans<-last(val)/prev; prev<-sum(val); ans}, by=rleid(long)][]
    long val v1         v2
 1:    0   2  7         NA
 2:    0   2  7         NA
 3:    0   3  7         NA
 4:    1   5 12 0.71428571
 5:    1   2 12 0.71428571
 6:    1   5 12 0.71428571
 7:    0   5 13 0.33333333
 8:    0   4 13 0.33333333
 9:    0   4 13 0.33333333
10:    1   1  4 0.07692308
11:    1   2  4 0.07692308
12:    1   1  4 0.07692308

> 3/NA
[1] NA
> 5/7
[1] 0.7142857
> 4/12
[1] 0.3333333
> 1/13
[1] 0.07692308
> prev
[1] NA

请注意， prev 值未更新，因为 prev 和 ans 是 j 范围内的局部变量，这些变量在每个组运行时都会更新 . 为了说明，可以使用R的 <<- 运算符从每个组内更新全局 prev ：

DT[, v2:={ans<-last(val)/prev; prev<<-sum(val); ans}, by=rleid(long)]
prev
[1] 4

但是没有必要在data.table中使用 <<- ，因为局部变量是静态的（保留它们之前组的值） . 除非您在查询完成后需要使用最终组的值 .

回复于 2024-04-28T06:34:08+08:00

1
你_1125112_优雅' pure-dplyr solution, because dplyr isn' t真的是为了做到这一点 . dplyr喜欢做的是分别使用窗口和汇总函数的map / reduce类型操作（ mutate 和 summarize ） . 你究竟是什么，因为你希望每个组都依赖于最后一个，所以你真的在描述一个带有副作用的循环操作 - 两个非R-philosophy操作 .

如果你想破解你所描述的方式，你可以尝试这样的方法：
```
new.initial.capital <- 0
for (z in split(df, df$x.long)) {
    z$initial.capital[[1]] <- new.initial.capital
    # some other calculations here
    # maybe you want to modify df as well
    new.initial.capital <- foo
}
```
但是，这实际上不是一个非常友好的R代码，因为它取决于副作用和循环 . 如果你想与dplyr集成，我建议你看看你是否可以根据摘要和/或窗口函数重新计算你的计算 .

更多：
https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf
https://danieljhocking.wordpress.com/2014/12/03/lags-and-moving-means-in-dplyr/
回复于 2024-04-28T06:34:08+08:00

这种使用第一个和最后一个是非常不整洁的，所以我们将保持最新的一步 .

首先，我们按照您的代码构建中间数据，但添加一些列以便稍后在正确的位置加入 . 我不确定你是否需要保留所有列，否则你不需要第二次加入 .

library(dplyr)
library(tidyr)

df1 <- df0 %>%
  dplyr::mutate(RunID = data.table::rleid(x.long)) %>%
  group_by(RunID) %>%
  mutate(RunID_f = ifelse(row_number()==1,RunID,NA)) %>%  #  for later merge
  mutate(RunID_l = ifelse(row_number()==n(),RunID,NA))    #  possibly unneeded

然后我们构建汇总数据，我按照你的意思稍微重构了你的代码，因为这些操作“应该”是行的 .

summarized_data <- df1 %>%
  filter(x.long !=0) %>%
  summarize_at(vars(close.x,inital.capital),c("first","last")) %>%
  mutate(x.long.share        = inital.capital_first / close.x_first,
         x.end.value         = x.long.share         * close.x_last,
         x.net.profit        = inital.capital_last - x.end.value,
         new.initial.capital = x.net.profit         + inital.capital_last,
         lagged.new.initial.capital = lag(new.initial.capital,1))

# A tibble: 2 x 10
#   RunID close.x_first inital.capital_first close.x_last inital.capital_last x.long.share x.end.value x.net.profit new.initial.capital lagged.new.initial.capital
#   <int>         <dbl>                <int>        <dbl>               <int>        <dbl>       <dbl>        <dbl>               <dbl>                      <dbl>
# 1     3         38.85                10000        38.13               10000     257.4003    9814.672     185.3282           10185.328                         NA
# 2     5         33.03                10000        34.34               10000     302.7551   10396.609    -396.6091            9603.391                   10185.33

然后我们将我们的汇总表加入到原始表中，充分利用了第一步的技巧 . 如果您不需要所有列，则可以跳过第一个连接 .

df2 <- df1 %>% ungroup %>%
  left_join(summarized_data %>% select(-lagged.new.initial.capital) ,by=c("RunID_l"="RunID")) %>%      # if you want the other variables, if not, skip the line
  left_join(summarized_data %>% select(RunID,lagged.new.initial.capital) ,by=c("RunID_f"="RunID")) %>%
  mutate(inital.capital = ifelse(is.na(lagged.new.initial.capital),inital.capital,lagged.new.initial.capital)) %>%
  select(close.x:inital.capital) # for readability here

# # A tibble: 20 x 6
# close.x x.long y.short x.short y.long inital.capital
# <dbl>  <int>   <int>   <int>  <int>          <dbl>
#  1 37.9600     NA      NA      NA     NA       10000.00
#  2 36.5200      0       0       0      0       10000.00
#  3 38.3200      0       0       0      0       10000.00
#  4 38.5504      0       0       0      0       10000.00
#  5 38.1700      0       0       0      0       10000.00
#  6 38.8500      1       1       0      0       10000.00
#  7 38.5300      1       1       0      0       10000.00
#  8 39.1300      1       1       0      0       10000.00
#  9 38.1300      1       1       0      0       10000.00
# 10 37.0100      0       0       1      1       10000.00
# 11 36.1400      0       0       1      1       10000.00
# 12 35.2700      0       0       1      1       10000.00
# 13 35.1300      0       0       1      1       10000.00
# 14 32.2000      0       0       1      1       10000.00
# 15 33.0300      1       1       0      0       10185.33
# 16 34.9400      1       1       0      0       10000.00
# 17 34.5700      1       1       0      0       10000.00
# 18 33.6000      1       1       0      0       10000.00
# 19 34.3400      1       1       0      0       10000.00
# 20 35.8600      0       0       1      1       10000.00

data

df<- read.table(text="close.x x.long  y.short x.short y.long  inital.capital  x.long.shares   x.end.value x.net.profit    new.initial.capital
37.96   NA  NA  NA  NA  10000   NA  NA  NA  NA
36.52   0   0   0   0   10000   0   0   0   0
38.32   0   0   0   0   10000   0   0   0   0
38.5504 0   0   0   0   10000   0   0   0   0
38.17   0   0   0   0   10000   0   0   0   0
38.85   1   1   0   0   10000   0   0   0   0
38.53   1   1   0   0   10000   0   0   0   0
39.13   1   1   0   0   10000   0   0   0   0
38.13   1   1   0   0   10000   257.4002574 9814.671815 185.3281853 10185.32819
37.01   0   0   1   1   10000   0   0   0   0
36.14   0   0   1   1   10000   0   0   0   0
35.27   0   0   1   1   10000   0   0   0   0
35.13   0   0   1   1   10000   0   0   0   0
32.2    0   0   1   1   10000   0   0   0   0
33.03   1   1   0   0   10000   0   0   0   0
34.94   1   1   0   0   10000   0   0   0   0
34.57   1   1   0   0   10000   0   0   0   0
33.6    1   1   0   0   10000   0   0   0   0
34.34   1   1   0   0   10000   302.7550711 10396.60914 -396.6091432    9603.390857
35.86   0   0   1   1   10000   0   0   0   0",stringsAsFactors=FALSE,header=TRUE)

df0 <- df %>% select(close.x:inital.capital)

回复于 2024-04-28T06:34:08+08:00

1
我花了很长时间才明白你的目标：单一的“更新”，这有用吗？
```
library(tidyverse)
library(magrittr)
temp <- df %>% 
  dplyr::mutate(RunID = data.table::rleid(x.long)) %>%
  group_by(RunID) %>% # Don't delete the RunID 
  dplyr::mutate(max.new = max(new.initial.capital)) %>% 
  slice(1) %>%
  arrange(x.long) %>% 
  dplyr::mutate(pass.value = lag(max.new))

df <- left_join(df, temp %>% dplyr::select(x.long, RunID, pass.value)
```
在此之后，根据上面的分组 row_number ，使用 pass.value 列替换 initial.capital 的值 .

我不太确定如何在没有循环这个更新程序的情况下解决这个问题，我想如果你想做这样的10,000次更新，那肯定会是一个无赖 . 但它将使您能够将值“传递”到第二个红色单元格中图片 .
回复于 2024-04-28T06:34:08+08:00
0

滚动这样的值可能非常困难 . 我认为最好在顶部加上一条作为交易的线，其净效应是为您的基本资本增加10k . 然后，您可以使用偏移量的累积总和来相对轻松地实现您正在寻找的内容：

pdf = df %>% group_by(group) %>% arrange(dates) %>% mutate(cs = cumsum(sales))

代码复制自r cumsum per group in dplyr

回复于 2024-04-28T06:34:08+08:00

我决定重新审视这个问题，这是一个解决方案，按行业分组 signal ，制作交易组ID的开始和结束 . 之后，使用普通 for loop 对 ifelse 语句进行计算并更新组之间的运行变量： shares ， total_start_capital 和 total_end_capital . 这些允许将变量从交易转移到下一个交易，并用于每个连续的交易计算 . 如果只允许 dplyr 更新组之间的变量 . 如果有人想要使用PnL $与％rets创建自己的后台测试脚本，这就有 Value .

# Dollar PnL Back Test Script Example 
# Andrew Bannerman 1.7.2017

df<- read.table(text="37.96   NA  NA  
36.52   0   0   
38.32   0   0   
38.55  0   0  
38.17   0   0   
38.85   1   1   
38.53   1   1  
39.13   1   1   
38.13   1   1    
37.01   0   0   
36.14   0   0   
35.27   0   0   
35.13   0   0   
32.2    0   0 
33.03   1   1   
34.94   1   1   
34.57   1   1  
33.6    1   1 
34.34   1   1  
35.86 0     0   ",stringsAsFactors=FALSE,header=TRUE)

colnames(df)[1] <- "close"
colnames(df)[2] <- "signal"
colnames(df)[3] <- "signal_short"

# Place group id at start/end of each group 
df <- df %>%
  dplyr::mutate(ID = data.table::rleid(signal)) %>%
  group_by(ID) %>%
  dplyr::mutate(TradeID = ifelse(signal ==1,as.numeric(row_number()),0))%>% # Run id per group month
  dplyr::mutate(group_id_last = ifelse(signal == 0,0, 
                                         ifelse(row_number() == n(), 3,0))) %>%
  dplyr::mutate(group_id_first = ifelse(TradeID == 1 & signal == 1,2,0))

############################################## 
# Custom loop 
################################################
run_start_equity <- 10000  # Enter starting equity
run_end_equity <- 0        # variable for updating end equity in loop
run.shares <- 0
df$start.balance <- 0
df$net.proceeds <- 0
df$end.balance <-0
df$shares <- 0
i=1
for (i in 1:nrow(df)) { 
  df$start.balance[i] <- ifelse(df$group_id_first[i] == 2, run_start_equity, 0)
  df$shares[i] <- ifelse(df$group_id_first[i] == 2, run_start_equity / df$close[i],0)
  run.shares <- ifelse(df$group_id_first[i] == 2, df$shares[i], run.shares)
  df$end.balance[i] <- ifelse(df$group_id_last[i] == 3, run.shares * df$close[i],0)
  run_end_equity <- ifelse(df$group_id_last[i] == 3, df$end.balance[i],run_end_equity)
  df$net.proceeds[i] <- ifelse(df$group_id_last[i] == 3, run_end_equity - run_start_equity,0)
  run_start_equity <- ifelse(df$group_id_last[i] == 3, df$end.balance[i] ,run_start_equity)
   }

使用所需的输出：

> df
# A tibble: 19 x 11
# Groups:   ID [5]
   close signal signal_short    ID TradeID group_id_last group_id_first start.balance net.proceeds end.balance   shares
   <dbl>  <int>        <int> <int>   <dbl>         <dbl>          <dbl>         <dbl>        <dbl>       <dbl>    <dbl>
 1 36.52      0            0     1       0             0              0         0.000       0.0000       0.000   0.0000
 2 38.32      0            0     1       0             0              0         0.000       0.0000       0.000   0.0000
 3 38.55      0            0     1       0             0              0         0.000       0.0000       0.000   0.0000
 4 38.17      0            0     1       0             0              0         0.000       0.0000       0.000   0.0000
 5 38.85      1            1     2       1             0              2     10000.000       0.0000       0.000 257.4003
 6 38.53      1            1     2       2             0              0         0.000       0.0000       0.000   0.0000
 7 39.13      1            1     2       3             0              0         0.000       0.0000       0.000   0.0000
 8 38.13      1            1     2       4             3              0         0.000    -185.3282    9814.672   0.0000
 9 37.01      0            0     3       0             0              0         0.000       0.0000       0.000   0.0000
10 36.14      0            0     3       0             0              0         0.000       0.0000       0.000   0.0000
11 35.27      0            0     3       0             0              0         0.000       0.0000       0.000   0.0000
12 35.13      0            0     3       0             0              0         0.000       0.0000       0.000   0.0000
13 32.20      0            0     3       0             0              0         0.000       0.0000       0.000   0.0000
14 33.03      1            1     4       1             0              2      9814.672       0.0000       0.000 297.1442
15 34.94      1            1     4       2             0              0         0.000       0.0000       0.000   0.0000
16 34.57      1            1     4       3             0              0         0.000       0.0000       0.000   0.0000
17 33.60      1            1     4       4             0              0         0.000       0.0000       0.000   0.0000
18 34.34      1            1     4       5             3              0         0.000     389.2589   10203.931   0.0000
19 35.86      0            0     5       0             0              0         0.000       0.0000       0.000   0.0000

回复于 2024-04-28T06:34:08+08:00

dplyr group by，将前一组的值转发到下一组

编辑

我试过了什么

6 回答

相关问题