自从我加入这个社区以来,我正在处理同一组用户定义的函数 . 所以我将提供相同的伪数据以及其他必要的代码 . 我的问题是关于将plyr或apply函数应用于包含数据帧作为元素的列表的正确语法,其中我想将“用户定义函数”应用于该数据帧列表的每个元素 . (这篇文章很长因为我提供了所有的代码)

为用户定义的函数创建数据

set.seed(31)
foo <- function(myHour, myDate){
  rlnorm(1, meanlog=0,sdlog=1)*(myHour) + (150*myDate) 
}
Hour <- 1:24
Day <-1:1080
dates <-seq(as.Date("2010-01-01"), by = "day", length.out= 1080)
myData <- expand.grid( Day, Hour)
names(myData) <- c("Date","Hour")

myData$Adspend <- apply(myData, 1, function(x) foo(x[2], x[1]))
myData$Date <-dates

myData$Demand <-(rnorm(1,mean = 0, sd=1)+.75*myData$Adspend)

myData$Hour<-as.factor(myData$Hour)

使用此数据,我将其传递给我创建的函数,该函数将数据集拆分为24个列表元素,并应用我创建的函数,该函数将早期小时广告支出列添加到每小时需求的每个数据集中 . 每个数据集将具有不同的小时数 . 这非常有效 .

library(timeDate)
library(lubridate)
library(xlsx)
library(rJava)
library(xlsxjars)
library(zoo)
library(xts)
library(reshape)
library(reshape2)

######################################################################################
AddLag <- function(DF,Date,Time,RESP,AD,LAG=9,BestPred=4, Split=TRUE) {
  SplitDat<-split(DF,list(DF[[Time]]))
  DFE<-vector("list",length(SplitDat))
  DATASETS<-vector("list",length(SplitDat))
  LAGDAT<-vector("list",length(SplitDat))
  CORS<-vector("list",length(SplitDat))
  BESTHR<-vector("list",length(SplitDat))
  BEST<-vector("list",length(SplitDat))
  Lags<-24-LAG
  ADDATE<-DF[,c(1,2,3)]
  RESPDATE<-DF[,c(1,2,4)]
  HOURAD<-melt(ADDATE, id=c(Date,Time), measured =c(AD))
  HOURAD<- cast(HOURAD,as.formula(paste0("... ~",Time)))
  ADHR<-zoo(HOURAD,HOURAD[,1])
  HOURRSP<-melt(RESPDATE, id=c(Date,Time), measured =c(RESP))
  HOURRSP<- cast(HOURRSP,as.formula(paste0("... ~",Time)))
  DEMHR<-zoo(HOURRSP,HOURRSP[,1])
  for(i in 1:ncol(ADHR)) {
    DFE[[i]]<-setNames(merge(DEMHR[,i],ADHR[,i]), c(dimnames(DEMHR)[[2]][i],dimnames(ADHR)[[2]][i]))    
    ifelse(i==1,DATASETS[[i]]<-setNames(merge(DFE[[i]],lag(ADHR[,24:Lags],-1)),c(paste0(dimnames(DEMHR)[[2]][i],".Rsp"),dimnames(ADHR)[[2]][i],dimnames(ADHR)[[2]][24:Lags])),DATASETS[[i]]<-setNames(merge(DFE[[i]],DATASETS[[i-1]][,2:ncol(DATASETS[[i-1]])]),c(paste(dimnames(DEMHR)[[2]][i],".Rsp"),dimnames(ADHR)[[2]][i],dimnames(DATASETS[[i-1]])[[2]][2:ncol(DATASETS[[i-1]])])))   
    LAGDAT[[i]]<-na.omit(DATASETS[[i]])
    CORS<-sapply(LAGDAT[[i]],cor,y=LAGDAT[[i]][,1])
    mask<-(rank(-abs(CORS))<=BestPred)
    BESTHR[[i]]<-LAGDAT[[i]][,mask]
    BEST[[i]]<-data.frame(Date=time(BESTHR[[i]]),BESTHR[[i]],check.names=FALSE,row.names=NULL)


  }
  return(BEST)
}

请尝试以下方法查看输出:

fdata<-AddLag(myData,"Date","Hour","Demand","Adspend")

Fdata是由数据帧组成的列表 . 我想将我创建的另一个函数应用于此数据框列表,该函数将相同的季节变量集附加到Fdata的每个元素 .

这是函数和代码:

AddCalEffect <-function(DF,Date,Time,Seasonal=TRUE, Holiday=TRUE, Hourly = FALSE){
#Create variables of calendar effects from Date field
DF$Date<-as.Date(DF[[Date]], format="%m/%d/%Y")
DF[[Time]]<-factor(DF[[Time]], levels = c(1:24))
monthly <- months(DF[[Date]])
dow <-weekdays(DF[[Date]])
year1<-year(DF[[Date]])
quarter<-quarters(DF[[Date]])
######################################################################################################################
#Create matrices of seasonal indicators for covariates
hmatx <- model.matrix(~as.factor(DF[[Time]]))[,2:24] # Matrix of hours
mmatx <- model.matrix(~as.factor(monthly))[,2:12] #Matrix of months
dmatx <- model.matrix(~as.factor(dow))[,2:7] #matrix of days of week
qmatx<-model.matrix(~as.factor(quarter))[,1:3] #matrix of Quarters of the year


#######################################################################################################################
# Create Holiday indicator variables with both holiday and weekend flagged if within 2 days of holiday
HolidayEnds<-function(h){
  h<-as.Date(h)
  w<-as.POSIXlt(h)$wday
  sort(unique(c(h,h[w%in%0:2]-1,
                h[w%in%1:2]-2,
                h[w==2]-3,
                h[w==4]+3,
                h[w%in% 4:5]+2,
                h[w%in%4:6]+1)))
}

Labor<-(DF[[Date]]%in% HolidayEnds(USLaborDay(year1)))
LaborDay<-ifelse(Labor=="TRUE",1,0)
Mem<-(DF[[Date]]%in% HolidayEnds(USMemorialDay(year1)))
MemDay<-ifelse(Mem=="TRUE",1,0)
Eas<-(DF[[Date]]%in% HolidayEnds(Easter(year1)))
Easter<-ifelse(Eas=="TRUE",1,0)
Thanks<-(DF[[Date]]%in% HolidayEnds(USThanksgivingDay(year1)))
ThanksDay<-ifelse(Thanks=="TRUE",1,0)
July<-(DF[[Date]]%in% HolidayEnds(USIndependenceDay(year1)))
July4th<-ifelse(July=="TRUE",1,0)
Christ<-(DF[[Date]]%in% HolidayEnds(USChristmasDay(year1)))
Christmas<-ifelse(Christ=="TRUE",1,0)
New<-(DF[[Date]]%in% HolidayEnds(USNewYearsDay(year1)))
NewYears<-ifelse(New=="TRUE",1,0)
colnames(hmatx)<-c("1AM-2AM","2AM-3AM","3AM-4AM","4AM-5AM","5AM-6AM","6AM-7AM","7AM-8AM","8AM-9AM","9AM-10AM","10AM-11AM","11AM-12PM","12PM-1PM","1PM-2PM","2PM-3PM","3PM-4PM","4PM-5PM","5PM-6PM","6PM-7PM","7PM-8PM","8PM-9PM","9PM-10PM","10PM-11PM","11PM-12AM" )
colnames(dmatx)<-c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday")
colnames(mmatx)<-c("February","March","April","May","June","July","August","September","October","November","December")
colnames(qmatx)<-c("Quarter1","Quarter2","Quarter3")
Holly<-cbind(Easter,MemDay,July4th, LaborDay,ThanksDay,Christmas,NewYears)
Season<-cbind(qmatx,mmatx)
Hourz<-as.data.frame(hmatx)

ifelse(Hourly=="TRUE", DataObject<-cbind(DF,Hourz),            
if(!Seasonal&!Holiday){DataObject<-cbind(DF,dmatx)}
else if (!Seasonal&Holiday){DataObject<-cbind(DF,dmatx,Holly)}
else if (Seasonal&!Holiday){DataObject<-cbind(DF,Season)}
else if (Seasonal&Holiday){DataObject<-cbind(DF,Season,Holly)})
return(DataObject)
}

尝试在fdata上使用ldply来应用函数AddCalEffect

这是我尝试过的方法之一的简单变化 . 我很难将循环从plyr转移到plyr并应用 . 任何帮助,将不胜感激 .

OptData<-ldply(fdata,AddCalEffect)

这个错误产生了

Error in (function(x, i, exact) if (is.matrix(i)) as.matrix(x)[[i]] else .subset2(x,  : 
  argument "Date" is missing, with no default