首页 文章

CUDA使用CMake在x86中构建

提问于
浏览
1

(Windows 10,Cuda 8.0,VS 2015,CMake 3.7.0

我在使用CUDA内核构建库时遇到问题 .

我的项目有一个顶级的CMakeLists.txt,其中包含一个子CMakeLists.txt .

在顶层:

cmake_minimum_required(VERSION 3.0)
project (robot)
find_package(CUDA REQUIRED)
#...some more stuff
include(${PROJECT_SOURCE_DIR}/projects/subproject/CMakeLists.txt)

然后,在子项目CMakeLists.txt中:

set(SUBPROJECT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/projects/subproject)

file(GLOB_RECURSE SUBPROJECT_HEADER ${SUBPROJECT_SOURCE_DIR}/*.h)
file(GLOB_RECURSE SUBPROJECT_SOURCE ${SUBPROJECT_SOURCE_DIR}/*.cpp)
file(GLOB_RECURSE SUBPROJECT_CUDA ${SUBPROJECT_SOURCE_DIR}/*.cu)
file(GLOB_RECURSE SUBPROJECT_CUDA_HEADER ${SUBPROJECT_SOURCE_DIR}/*.cuh)

cuda_add_library(subproject STATIC ${SUBPROJECT_HEADER} ${SUBPROJECT_SOURCE} ${SUBPROJECT_CUDA_HEADER} ${SUBPROJECT_CUDA})
target_include_directories(subproject PRIVATE ${SUBPROJECT_SOURCE_DIR}/include)
#some other includes and target_includes here...

target_link_libraries(subproject <some links here>)

在我的源目录中,我有一个.cuh和一个.cu文件 . 这些基于简单的VectorAdd测试:

kernel.cuh:

#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>

__global__ void VectorAdd(int *a, int *b, int *c, int n);
int test();

而kernel.cu:

#include "kernel.cuh"

#define SIZE 1024



__global__ void VectorAdd(int *a, int *b, int *c, int n) {
    int i = threadIdx.x;

    if (i < n){
        c[i] = a[i] + b[i];
    }
}

int test() {

    int *a, *b, *c;
    int *d_a, *d_b, *d_c;

    a = (int *)malloc(SIZE * sizeof(int));
    b = (int *)malloc(SIZE * sizeof(int));
    c = (int *)malloc(SIZE * sizeof(int));

    cudaMalloc(&d_a, SIZE * sizeof(int));
    cudaMalloc(&d_b, SIZE * sizeof(int));
    cudaMalloc(&d_c, SIZE * sizeof(int));

    for (int i = 0; i < SIZE; ++i) {
        a[i] = i;
        b[i] = i;
        c[i] = 0;

    }

    cudaMemcpy(d_a, a, SIZE * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, b, SIZE * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_c, c, SIZE * sizeof(int), cudaMemcpyHostToDevice);

    VectorAdd<<< 1, SIZE >>>(d_a, d_b, d_c, SIZE);

    cudaMemcpy(c, d_c, SIZE * sizeof(int), cudaMemcpyDeviceToHost);


    for (int i = 0; i < 10; ++i) {
        printf("c[%d] = %d\n", i, c[i]);
    }

    free(a);
    free(b);
    free(c);

    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    return 0;

}

我在项目的不同C文件中包含Kernel.cuh . 尝试编译我的解决方案,我收到以下错误:

Severity    Code    Description Project File    Line    Suppression State
Error   LNK1112 module machine type 'X86' conflicts with target machine type 'x64'  subproject  C:\path_to_proj\build\CMakeFiles\subproject.dir\projects\subproject\src\Release\subproject_generated_kernel.cu.obj  1

然后导致无法找到.lib的事实 . 这个错误的原因是什么?我需要在CMakeLists中添加一些内容吗?

1 回答

  • 3

    我不确定,这是否是解决问题的正确方法 . 但它适用于我,我使用与您几乎相同的配置(Windows 10,visual studio 13,cuda 8.0,cmake 3.7) .

    在我的cmake文件中,除了 find cuda 命令外,我还有以下代码 . 也许第一行适合您的错误按摩)

    set(CUDA_64_BIT_DEVICE_CODE ON CACHE STRING "Compile device code in 64 bit mode" FORCE)
    

    并完成我的cuda-cmake列表(最后一行也可以帮助,它强制链接器找到x64 cuda lib):

    find_path(CUDA_CUT_INCLUDE_DIR
          helper_cuda.h
          PATHS "$ENV{NVSDKCOMPUTE_ROOT}" "$ENV{NVSDKCUDA_ROOT}" "$ENV{NVCUDASAMPLES_ROOT}"
          PATH_SUFFIXES "common/inc" "CUDA Samples/v7.5/common/inc" "v7.5/common/inc"
          DOC "Location of helper_cuda.h"
          NO_DEFAULT_PATH
        )
    
    INCLUDE_DIRECTORIES( ${CUDA_INCLUDE_DIRS} ${CUDA_CUT_INCLUDE_DIR})
    LINK_DIRECTORIES(${CUDA_INCLUDE_DIRS}/../lib/x64)
    

    我希望其中一些可以帮到你 . 祝好运!

相关问题