首页 文章

使用数据工厂创建管道,从azure blob存储到数据湖存储的复制活动

提问于
浏览
0

我正在尝试使用数据工厂创建一个管道,其中包含从azure blob存储到数据湖存储的复制活动 .

但是在运行管道时,它显示状态失败并且低于错误: -

复制活动在源端遇到用户错误:ErrorCode = UserErrorSourceBlobNotExist,'Type = Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message =缺少必需的Blob . ContainerName:https:// *********,ContainerExist:True,BlobPrefix:,BlobCount:0.,Source = Microsoft.DataTransfer.ClientLibrary,' .

1 回答

  • 1

    我跟随天蓝色官方tutorials使用数据工厂,从天蓝色blob存储到azure湖商店的复制活动 . 它在我身边正常工作 . 我们可以使用Azure portalVisual Studiopowershell创建管道 . 我们可以一步一步地遵循tutorials来做到这一点 . tutorials还提供了以下代码 .

    AzureStorage类型的链接服务 .

    {
      "name": "StorageLinkedService",
      "properties": {
        "type": "AzureStorage",
        "typeProperties": {
          "connectionString": "DefaultEndpointsProtocol=https;AccountName=<accountname>;AccountKey=<accountkey>"
        }
      }
    }
    

    AzureDataLakeStore类型的链接服务 .

    {
        "name": "AzureDataLakeStoreLinkedService",
        "properties": {
            "type": "AzureDataLakeStore",
            "typeProperties": {
                "dataLakeStoreUri": "https://<accountname>.azuredatalakestore.net/webhdfs/v1",
                "servicePrincipalId": "<service principal id>",
                "servicePrincipalKey": "<service principal key>",
                "tenant": "<tenant info, e.g. microsoft.onmicrosoft.com>",
                "subscriptionId": "<subscription of ADLS>",
                "resourceGroupName": "<resource group of ADLS>"
            }
        }
    }
    

    AzureBlob类型的输入数据集 .

    {
      "name": "AzureBlobInput",
      "properties": {
        "type": "AzureBlob",
        "linkedServiceName": "StorageLinkedService",
        "typeProperties": {
          "folderPath": "mycontainer/myfolder/yearno={Year}/monthno={Month}/dayno={Day}",
          "partitionedBy": [
            {
              "name": "Year",
              "value": {
                "type": "DateTime",
                "date": "SliceStart",
                "format": "yyyy"
              }
            },
            {
              "name": "Month",
              "value": {
                "type": "DateTime",
                "date": "SliceStart",
                "format": "MM"
              }
            },
            {
              "name": "Day",
              "value": {
                "type": "DateTime",
                "date": "SliceStart",
                "format": "dd"
              }
            },
            {
              "name": "Hour",
              "value": {
                "type": "DateTime",
                "date": "SliceStart",
                "format": "HH"
              }
            }
          ]
        },
        "external": true,
        "availability": {
          "frequency": "Hour",
          "interval": 1
        },
        "policy": {
          "externalData": {
            "retryInterval": "00:01:00",
            "retryTimeout": "00:10:00",
            "maximumRetry": 3
          }
        }
      }
    }
    

    AzureDataLakeStore类型的输出数据集 .

    {
        "name": "AzureDataLakeStoreOutput",
          "properties": {
            "type": "AzureDataLakeStore",
            "linkedServiceName": "AzureDataLakeStoreLinkedService",
            "typeProperties": {
                "folderPath": "datalake/output/"
            },
            "availability": {
                  "frequency": "Hour",
                  "interval": 1
            }
          }
    }
    

    具有使用BlobSource和AzureDataLakeStoreSink的复制活动的管道 .

    {  
        "name":"SamplePipeline",
        "properties":
        {  
            "start":"2014-06-01T18:00:00",
            "end":"2014-06-01T19:00:00",
            "description":"pipeline with copy activity",
            "activities":
            [  
                  {
                    "name": "AzureBlobtoDataLake",
                    "description": "Copy Activity",
                    "type": "Copy",
                    "inputs": [
                      {
                        "name": "AzureBlobInput"
                      }
                    ],
                    "outputs": [
                      {
                        "name": "AzureDataLakeStoreOutput"
                      }
                    ],
                    "typeProperties": {
                        "source": {
                            "type": "BlobSource"
                          },
                          "sink": {
                            "type": "AzureDataLakeStoreSink"
                          }
                    },
                       "scheduler": {
                          "frequency": "Hour",
                          "interval": 1
                    },
                    "policy": {
                          "concurrency": 1,
                          "executionPriorityOrder": "OldestFirst",
                          "retry": 0,
                          "timeout": "01:00:00"
                    }
                  }
            ]
        }
    }
    

相关问题