首页 文章

为什么大文件在上传到Azure blob时有时会损坏

提问于
浏览
0

我正在天蓝色存储中上传一个大文件 . 我正在将文件上传到4 MB块 . 我使用了过去1年的以下代码,但是从上个月开始,每当我上传文件时,它会被腐败一段时间,有时会上传很好 .

任何人都可以建议我在代码中需要更改的内容 .

//将文件从文件系统上传到blob . 并行实现 .

public void ParallelUploadFile(CloudBlockBlob blob1, string fileName1, BlobRequestOptions options1, int maxBlockSize = 4 * 1024 * 1024, int rowId)
        {
            blob = blob1;
            fileName = fileName1;
            options = options1;

            file = new FileInfo(fileName);
            var fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read,FileShare.ReadWrite);
            long fileSize = file.Length;
        //Get the filesize
        long fileSizeInMb = file.Length/1024/1024;

        // let's figure out how big the file is here
        long leftToRead = fileSize;
        long startPosition = 0;

        // have 1 block for every maxBlockSize bytes plus 1 for the remainder
        var blockCount =
            ((int) Math.Floor((double) (fileSize/maxBlockSize))) + 1;

        blockIds = new List<string>();

        // populate the control array...
        for (int j = 0; j < blockCount; j++)
        {
            var toRead = (int) (maxBlockSize < leftToRead
                                    ? maxBlockSize
                                    : leftToRead);

            var blockId = Convert.ToBase64String( 
                Encoding.ASCII.GetBytes(
                    string.Format("BlockId{0}", j.ToString("0000000"))));


            transferDetails.Add(new BlockTransferDetail()
                                    {
                                        StartPosition = startPosition,
                                        BytesToRead = toRead,
                                        BlockId = blockId
                                    });

            if (toRead > 0)
            {
                blockIds.Add(blockId);
            }

            // increment the starting position
            startPosition += toRead;
            leftToRead -= toRead;
        }

        //*******
        //PUT THE NO OF THREAD LOGIC HERE
        //*******

        int runFrom = 0;
        int runTo = 0;
        int uploadParametersCount = 0;
        TotalUpload = Convert.ToInt64(fileSizeInMb);

        for (int count = 0; count < transferDetails.Count; )
        {
            //Create uploading file parameters
            uploadParametersesList.Add(new UploadParameters()
                                           {
                                               FileName = file.FullName,
                                               BlockSize = 3900000,
                                               //BlockSize = 4194304,
                                               LoopFrom = runFrom + runTo,
                                               IsPutBlockList = false,
                                               UploadedBytes = 0,
                                               Fs = fileStream,
                                               RowIndex = rowId,
                                               FileSize = Convert.ToInt64(fileSizeInMb)
                                           });


            //Logic to create correct threads
            if (transferDetails.Count < 50)
            {
                runTo = transferDetails.Count;
                uploadParametersesList[uploadParametersCount].LoopTo += runTo;
                count += transferDetails.Count;
            }
            else
            {
                var tmp = transferDetails.Count - runTo;
                if (tmp > 50 && tmp < 100)
                {
                    runTo += tmp;
                    count += tmp;
                    uploadParametersesList[uploadParametersCount].LoopTo += runTo;
                }
                else
                {
                    runTo += 50;
                    count += 50;
                    uploadParametersesList[uploadParametersCount].LoopTo += runTo;
                }
            }

            //Add to Global Const
            GlobalConst.UploadedParameters.Add(uploadParametersesList[uploadParametersCount]);

            //Start the thread
            int parametersCount = uploadParametersCount;
            var thread = new Thread(() => ThRunThis(uploadParametersesList[parametersCount]))
                             {Priority = ThreadPriority.Highest};
            thread.Start();

            uploadParametersCount++;

            //Start a timer here to put all blocks on azure blob
            aTimer.Elapsed += OnTimedEvent;
            aTimer.Interval = 5000;
            aTimer.Start();
        }
    }

    //Timer callback
    private void OnTimedEvent(object source, ElapsedEventArgs e)
    {
        if (uploadParametersesList.Count(o => o.IsPutBlockList) == uploadParametersesList.Count)
        {
            aTimer.Elapsed -= OnTimedEvent;
            aTimer.Stop();

            //Finally commit it
            try
            {
                uploadParametersesList.ForEach(x => x.Status = "Uploaded");
                blob.PutBlockList(blockIds);

                IsCompleted = true;
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception.Message);
            }
        }
    }

    //Main thread
    private void ThRunThis(UploadParameters uploadParameters)
    {
        try
        {
            for (int j = uploadParameters.LoopFrom; j < uploadParameters.LoopTo; j++)
            {
                br = new BinaryReader(uploadParameters.Fs);
                var bytes = new byte[transferDetails[j].BytesToRead];

                //move the file system reader to the proper position
                uploadParameters.Fs.Seek(transferDetails[j].StartPosition, SeekOrigin.Begin);
                br.Read(bytes, 0, transferDetails[j].BytesToRead);

                if (bytes.Length > 0)
                {
                    //calculate the block-level hash
                    MD5 md5 = new MD5CryptoServiceProvider();
                    byte[] blockHash = md5.ComputeHash(bytes);
                    string convertedHash = Convert.ToBase64String(blockHash, 0, 16);
                    blob.PutBlock(transferDetails[j].BlockId, new MemoryStream(bytes), convertedHash, options);

                    //Update Uploaded Bytes
                    uploadParameters.UploadedBytes += transferDetails[j].BytesToRead;
                    TotalUploadedBytes += transferDetails[j].BytesToRead;
                    Console.WriteLine(Thread.CurrentThread.Name);
                    //Try to free the memory
                    try
                    {
                        GC.Collect();
                    }
                    catch (Exception exception)
                    {
                        Console.WriteLine(exception.Message);
                    }
                }
            }

            //Is Completed
            uploadParameters.IsPutBlockList = true;
        }
        catch (Exception exception)
        {
            Console.WriteLine(Thread.CurrentThread.Name);
            uploadParameters.Exception = exception.Message;
            Console.WriteLine(exception.Message);
        }
    }

1 回答

  • 1

    自从我用线程触及大型blob上传以来已经有很长一段时间了,但看起来你的阻止列表正在被线程排除 .

    为什么不在上传所有块后从 Cloud 中获取阻止列表,然后将该列表用于putBlockList . 那将确保你按正确的顺序得到它们 .

相关问题