连体网络输出-Java 学习之路

我正在尝试在caffe中实现一个暹罗网络，它由两个不共享权重的图像网组成 . 所以我基本上要做的是给每个网络一个图像，最后试着找出它们之间的相似距离，下面是我的原型 . 所以我的主要问题是我应该如何设置“num_output”呢？我的训练只有2个课程，0个不同，他们不相同，1个是相似的 .

name: "Siamese_ImageNet"
layers {
  name: "data"
  type: IMAGE_DATA
  top: "data"
  top: "label"
  image_data_param {
    source: "train1.txt"
    batch_size: 32
    new_height: 256
    new_width: 256
  }
  include: { phase: TRAIN }
}
layers {
  name: "data"
  type: IMAGE_DATA
  top: "data"
  top: "label"
  image_data_param {
    source: "test1.txt"
    batch_size: 32
    new_height: 256
    new_width: 256
  }
  include: { phase: TEST }
}

layers {
  name: "data_p"
  type: IMAGE_DATA
  top: "data_p"
  top: "label_p"
  image_data_param {
    source: "train2.txt"
    batch_size: 32
    new_height: 256
    new_width: 256
  }
  include: { phase: TRAIN }
}
layers {
  name: "data_p"
  type: IMAGE_DATA
  top: "data_p"
  top: "label_p"
  image_data_param {
    source: "test2.txt"
    batch_size: 32
    new_height: 256
    new_width: 256
  }
  include: { phase: TEST }
}


layers {
  name: "conv1"
  type: CONVOLUTION
  bottom: "data"
  top: "conv1"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu1"
  type: RELU
  bottom: "conv1"
  top: "conv1"
}
layers {
  name: "pool1"
  type: POOLING
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm1"
  type: LRN
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv2"
  type: CONVOLUTION
  bottom: "norm1"
  top: "conv2"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu2"
  type: RELU
  bottom: "conv2"
  top: "conv2"
}
layers {
  name: "pool2"
  type: POOLING
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm2"
  type: LRN
  bottom: "pool2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv3"
  type: CONVOLUTION
  bottom: "norm2"
  top: "conv3"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu3"
  type: RELU
  bottom: "conv3"
  top: "conv3"
}
layers {
  name: "conv4"
  type: CONVOLUTION
  bottom: "conv3"
  top: "conv4"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu4"
  type: RELU
  bottom: "conv4"
  top: "conv4"
}
layers {
  name: "conv5"
  type: CONVOLUTION
  bottom: "conv4"
  top: "conv5"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu5"
  type: RELU
  bottom: "conv5"
  top: "conv5"
}
layers {
  name: "pool5"
  type: POOLING
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "fc6"
  type: INNER_PRODUCT
  bottom: "pool5"
  top: "fc6"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu6"
  type: RELU
  bottom: "fc6"
  top: "fc6"
}
layers {
  name: "drop6"
  type: DROPOUT
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc7"
  type: INNER_PRODUCT
  bottom: "fc6"
  top: "fc7"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu7"
  type: RELU
  bottom: "fc7"
  top: "fc7"
}
layers {
  name: "drop7"
  type: DROPOUT
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}

layers {
  name: "conv1_p"
  type: CONVOLUTION
  bottom: "data_p"
  top: "conv1_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu1_p"
  type: RELU
  bottom: "conv1_p"
  top: "conv1_p"
}
layers {
  name: "pool1_p"
  type: POOLING
  bottom: "conv1_p"
  top: "pool1_p"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm1_p"
  type: LRN
  bottom: "pool1_p"
  top: "norm1_p"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv2_p"
  type: CONVOLUTION
  bottom: "norm1_p"
  top: "conv2_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu2_p"
  type: RELU
  bottom: "conv2_p"
  top: "conv2_p"
}
layers {
  name: "pool2_p"
  type: POOLING
  bottom: "conv2_p"
  top: "pool2_p"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm2_p"
  type: LRN
  bottom: "pool2_p"
  top: "norm2_p"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv3_p"
  type: CONVOLUTION
  bottom: "norm2_p"
  top: "conv3_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu3_p"
  type: RELU
  bottom: "conv3_p"
  top: "conv3_p"
}
layers {
  name: "conv4_p"
  type: CONVOLUTION
  bottom: "conv3_p"
  top: "conv4_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu4_p"
  type: RELU
  bottom: "conv4_p"
  top: "conv4_p"
}
layers {
  name: "conv5_p"
  type: CONVOLUTION
  bottom: "conv4_p"
  top: "conv5_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu5_p"
  type: RELU
  bottom: "conv5_p"
  top: "conv5_p"
}
layers {
  name: "pool5_p"
  type: POOLING
  bottom: "conv5_p"
  top: "pool5_p"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "fc6_p"
  type: INNER_PRODUCT
  bottom: "pool5_p"
  top: "fc6_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu6_p"
  type: RELU
  bottom: "fc6_p"
  top: "fc6_p"
}
layers {
  name: "drop6_p"
  type: DROPOUT
  bottom: "fc6_p"
  top: "fc6_p"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc7_p"
  type: INNER_PRODUCT
  bottom: "fc6_p"
  top: "fc7_p"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu7_p"
  type: RELU
  bottom: "fc7_p"
  top: "fc7_p"
}
layers {
  name: "drop7_p"
  type: DROPOUT
  bottom: "fc7_p"
  top: "fc7_p"
  dropout_param {
    dropout_ratio: 0.5
  }
}

layers {
    name: "loss"
    type: CONTRASTIVE_LOSS
    contrastive_loss_param {
        margin: 1.0
    }
    bottom: "fc7"
    bottom: "fc7_p"
    bottom: "label"
    top: "loss"
}

我的训练文件结构：0不相似，1相似

train1.txt:
 /aer/img1_1.jpg 0
 /aer/img1_2.jpg 1
 /aer/img1_3.jpg 1

 train2.txt:
 /tpd/img2_1.jpg 0
 /tpd/img2_2.jpg 1
 /tpd/img2_3.jpg 1

3 回答

0

我相信 num_output 定义了提取的特征向量的维度，然后提取的特征可用于确定 L2 距离 . 如果 L2 距离大于 1 ，那么它是一个不同的类，如果它接近 0 ，则图像类似 . 休息戴尔的答案是完美的 .

回复于 2024-05-06T11:10:57+08:00
1
我应该设置“num_output”？

在了解你应该设置多少 num_output 之前，让我们解释一下它的含义 . 实际上，您可以查看Simense网络的两侧， data -> fc7 ， data_p -> fc7_p 作为2个特征提取器 . 每一个都是提取特征，例如 fc7 和 fc7_p 来自相应数据层中的图像 . 所以 num_output 定义了提取的特征向量的维数 .

在训练期间，当矢量表示的图像类似时， ContrastiveLoss 图层总是尝试最小化2个提取的特征向量的距离（ label == 1 ），并在不熟悉时最大化距离（ label == 0 ） . 即，特征向量的距离越小，图像越相似 .

那么特征向量的最佳维度是什么才能最好地包含指示相似性的信息？或者你应该怎么设置 num_output ？可能没有确切的值，它取决于特征提取器的编码质量（您可以将该特征视为图像的代码）以及识别图像相似性的难度 . 所以基本上如果网络（特征提取器）很深并且识别相似性并不太难，你可以选择一个相对较小的例如，因为该特征可以被更大的网络很好地编码并且更具辨别力 . 如果不是，您可以尝试更大的值，例如500,1000或尝试更复杂的网络 .

如果你想尝试 MultinomialLogisticLoss 而不是 ContrastiveLoss 图层，首先应该使用像 CONCAT 这样的图层将2个特征向量 fc7 ， fc7_p 融合为1，然后将其输入 SOFTMAX_LOSS 图层，如下所示：
```
...#original layers
layers {
  name: "concat"
  type: CONCAT
  bottom: "fc7"
  bottom: "fc7_p"  
  top: "fc_concat" # concatenate fc7 and fc7_p along channel axis
}
layer {
  name: "fc_cls"
  type: INNER_PRODUCT
  bottom: "fc_concat"
  top: "fc_cls"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2 # a binary classification problem in this case
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "accuracy"
  type: ACCURACY
  bottom: "fc_cls"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "fc_cls"
  bottom: "label"
  top: "loss"
}
```
更新

为了比较相似性并将其用于部署，Constrastive Loss或SoftMax Loss，哪种方法最好？

Softmax Loss简单易用 . 但它只能给你二进制预测，即相似或不相似 . 它给出的2类（相似的，不相似的）的概率分布通常太硬（不均匀），例如， [0.9*, 0.0*] ，_ [0.0*, 0.9*] ，....在很多情况下，它不能很好地反映真正的输入相似度 .

使用Constrastive Loss时，您可以获得图像的判别特征向量 . 并且你可以使用向量来计算相似概率，就像CVPR 2005论文_424781_在4.1节中所做的那样 . （关键点是使用从属于同一主题的图像生成的特征向量来计算多元法向密度） . 您还可以使用阈值来控制模型的the false positive rate and the false negative rate以获得ROC curve以更好地评估模型 .

顺便说一句，要挖掘出更多用于预测相似性的CNN架构，您可以参考CVPR 2015论文Learning to Compare Image Patches via Convolutional Neural Networks .
回复于 2024-05-06T11:10:57+08:00

只是为了纠正Dale的优秀answer上面的Caffe 's uber sensitive syntax, for noobs that get stuck like myself, here' s一些更正（层到层，一些引用，加上删除注释和有效大小写）

layer {
  name: "concat"
  type: "Concat"
  bottom: "fc7"
  bottom: "fc7_p"  
  top: "fc_concat"
}
layer {
  name: "fc_cls"
  type: "InnerProduct"
  bottom: "fc_concat"
  top: "fc_cls"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc_cls"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc_cls"
  bottom: "label"
  top: "loss"
}

回复于 2024-05-06T11:10:57+08:00

连体网络输出

3 回答

更新

相关问题