我试图了解DataLoader的工作原理 .

这是我应用它的方式:

# DATASET
class Word2VecDataset(torch_data.Dataset):

def __init__(self, vocabulary):
    super(Word2VecDataset, self).__init__()
    self.data_list = []
    self.vocab = vocabulary
    self.generate_batch_list()

def __getitem__(self, index):
    return self.data_list[index]

def __len__(self):
    return len(self.data_list)

def generate_batch_list(self):
    training_data = self.vocab.get_training_phrases()
    for query in training_data.Query:
        query = utils.skip_gram_tokenize(vocab=self.vocab, sentence=query)
        for entry in query:
            self.data_list.append(entry)
    for response in training_data.Response:
        response = utils.skip_gram_tokenize(vocab=self.vocab, sentence=response)
        for entry in response:
            self.data_list.append(entry)

这是实际的数据加载器部分:

dataset = Word2VecDataset(self.vocab)
 data_loader = torch_data.DataLoader(dataset, self.batch_size, True, num_workers=4)

 print('Model Initialized')
 for epo in range(self.num_epochs):
     loss_val = None
     for i_batch, sample_batched in enumerate(data_loader): # This seems to be causing issues. For some reason this is the part that 'reboots' the whole model, making it print twice (more info under the code)
         loss_val = 0
         for data, target in sample_batched:
         ....

现在,奇怪的是两个初始化阶段(你没有't see here) that says '这是检测到的gpu:xxx',而 print('Model Initialized') 得到两次打印 .

最后,(pastebin)是完整的控制台日志(带有错误) .