Hello folks!

我使用 brglm::brglm 在R中拟合了多变量(偏差减少)逻辑回归 . 我一直试图制作一个好看的情节

  • 沿着因子x1(线)的(y = 1)的可行性

  • 0.95置信区间

  • y = 0和y = 1的单独直方图

我尝试使用 popbio::log.hist.plot ,但在分组多个图时,我没有't manage to add a confidence interval and didn'使用 par(mar, oma, mgp, mai) 实现移动 .
然后我尝试了 visreg::visreg ,它创建了带有分隔地毯的漂亮地块,但没有添加直方图的选项 .
我找到ggplot2: How to combine histogram, rug plot, and logistic regression prediction in a single graph,但我再次无法在 ggplot -graph中添加一个不确定区间 .

最后,我使用上面链接中的一些行以及我的一些非常粗略的代码组合了 visregggplot

library(brglm)
library(plyr)
library(dplyr)
library(visreg)
library(ggplot2)

# creating random data
set.seed(60)
df <- cbind(y <- c(floor(runif(30, min=0, max=1.2)), 
                   floor(runif(50, min=0, max=2))),
           x1 <- c(runif(30, min=0, max=50), runif(50, min=20, max=100)),
           x2 <- runif(80, min=0, max=100),
           x3 <- runif(80, min=0, max=100))
colnames(df) <- c("y", "x1", "x2", "x3")
df <- data.frame(df)

# fitting the model
mod <- brglm(y ~ x1 + x2 + x3, binomial, data=df)

# setting breaks for histograms
h = df %>% group_by(y) %>%
    mutate(breaks = cut(x1, breaks=seq(from= 0, to= 105, by=5), 
                        labels=seq(from= 0, to= 100, by= 5), 
                        include.lowest=TRUE),
           breaks = as.numeric(as.character(breaks))) %>%
           group_by(y, breaks) %>% 
           summarise(n = n()) %>%
           mutate(ext = ifelse(y==0, n/sum(n), 1 - n/sum(n)))

# splitting h for y = 0 or 1, setting mins and maxs
h.split <- split(h, h$y)
min1_p <- min((h.split$"1")$ext)
max1_n <- max((h.split$"1")$n)
max0_p <- max((h.split$"0")$ext)
max0_n <- max((h.split$"0")$n)

# plottin visreg with hist
visreg (mod, "x1",  ylab=NULL, xlab= "x1", 
        line=list(col="black", size=2), gg=T, scale="response") + 
geom_segment(data=h, size=5, show.legend=FALSE, 
aes(x=breaks+2.5, xend=breaks+2.5, y=y, yend=ext))+
scale_y_continuous(name = expression("p(y=1)"), limits = c(0, 1), 
                   sec.axis = sec_axis(~ . +0 , name = "n", breaks = c(
                                       seq(from=(max0_p/max0_n), 
                                           to=max0_p, 
                                           by=(max0_p/max0_n)),
                                       seq(from= min1_p, 
                                           to=1-(1-min1_p)/max1_n, 
                                           by=(1-min1_p)/max1_n)), 
                                       labels = c(
                                           seq(from=1, to=max0_n, by=1), 
                                           rev(seq(from=1, to=max1_n, 
                                           by=1)))
                  )) +
scale_x_continuous(limits=c(min(x1)-1,max(x1))) + 
theme(panel.background = element_rect(fill = "white", colour = "grey"))

结果情节看起来像这样visreg + confedence interval + histograms with ggplot2

我的问题是:

  • 如何将断点和标签设置为宽度为x = 10的直方图柱?

  • 为什么辅助y轴在y = 0和1时缩放差异,我该如何改变?

  • 有没有更好的方法来做这一切?

感谢您的关注,
兰妮