我想在C中使用MPI实现Cannon算法,使用笛卡尔通信器,使用默认函数移位并从2个矩阵发送2维块 . 我曾尝试过在网上找到的几个教程,但我发现没有一个按照我想要的方式实现,使用二维块和笛卡尔传播者 .
编辑:我意识到我以错误的方式使用proc_grid_size变量,将进程矩阵的大小与块大小混淆并进入一些未分配的内存区域,我设法克服了错误 . 我正在运行25个进程和2个10 * 10矩阵的输入存储在2个不同的文件中 . 我目前正在尝试使用MPI_Cart_Shift函数实现移位操作 . 但我不知道如何将块发送给邻居 . 这是我当前对此特定部分的实现,它不起作用(应用程序只是挂起):
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
int nlocal;
int npes, dims[2], periods[2];
int myrank, my2drank, mycoords[2];
int uprank, downrank, leftrank, rightrank, coords[2];
int shiftsource, shiftdest;
MPI_Status status;
MPI_Comm comm_2d;
// Get the communicator related information
MPI_Comm_size(MPI_COMM_WORLD, &npes);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// Set up the Cartesian topology
dims[0] = dims[1] = proc_matrix_size;//sqrt(npes);
// Set the periods for wraparound connections
periods[0] = periods[1] = 1;
// Create the Cartesian topology, with rank reordering
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);
// Get the rank and coordinates with respect to the new topology
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
// Compute ranks of the up and left shifts
// Get line neighbors (direction = 1, displacement = 1)
MPI_Cart_shift(comm_2d, 1, 1, &leftrank, &rightrank);
// Get column neighbors (direction = 0, displacement = 1)
MPI_Cart_shift(comm_2d, 0, 1, &uprank, &downrank);
// Determine the dimension of the local matrix block
nlocal = block_size;// n / dims[0];
MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(a[0][0]), 1, subarrtype,
shiftdest, 1, shiftsource, 1, comm_2d, &status);
MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(b[0][0]), 1, subarrtype,
shiftdest, 1, shiftsource, 1, comm_2d, &status);
关闭应用程序后,我发现根进程是唯一挂起的进程:
F:\ Facultate \ AN_4 \ PDC \ Labs \ MPI \ Cannon \ x64 \ Release> mpiexec -np 25 Cannon.exe a.txt b.txt> mpiexec中止作业...作业中止:[rank] message [0]用户[1-24]终止作业----在DESKTOP-JB1815M ctrl-c上的错误分析----- [0]被击中 . 工作由用户中止 . ----错误分析-----
初始解决的代码:
int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) calloc(n*m, sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**) calloc(n, sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;
// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;
// Declare matrix dimensions
int ma = 0, na = 0;
int mb = 0, nb = 0;
// Nr of processes on each line/collumn in process mesh
int proc_matrix_size = (int)sqrt(world_size);
// Single value for quadratic matrix size
int n = 0;
// Nr of elements on each line/collumn in local matrix
// of each process
int block_size = 0;
// Open files and read matrices
if (world_rank == 0)
{
fa = fopen(argv[1], "r");
fb = fopen(argv[2], "r");
// Read matrix dymensions
fscanf(fa, "%d %d\n", &ma, &na);
fscanf(fb, "%d %d\n", &mb, &nb);
// Check if matrices are quadratic
if ((ma != na) && (na != mb) && (mb != nb))
{
printf("Invalid matrices dimensions\n");
return 0;
}
n = na;
// Check if sqrt(nr_processes) divides matrix dimension
if ((n % proc_matrix_size != 0) || (world_size % proc_matrix_size != 0))
{
printf("Number of processes does not fit matrix size\n");
return 0;
}
block_size = n / proc_matrix_size;
malloc2D(&A, n, n);
malloc2D(&B, n, n);
malloc2D(&C, n, n);
// Read matrices A & B from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fa, "%d ", &A[i][j]);
fscanf(fb, "%d ", &B[i][j]);
}
fscanf(fa, "\n");
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/*
Divide matrices in blocks and send each block to the corresponding process
*/
// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int *globalCptr = NULL;
// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;
int **A2 = NULL;
int **B2 = NULL;
// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
int **c = NULL;
malloc2D(&A2, n, n);
malloc2D(&B2, n, n);
if (world_rank == 0)
{
globalAptr = &(A[0][0]);
globalBptr = &(B[0][0]);
globalA2ptr = &(A2[0][0]);
globalB2ptr = &(B2[0][0]);
globalCptr = &(C[0][0]);
}
malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
malloc2D(&c, block_size, block_size);
// Sizes of input global matrix
int sizes[2] = { n, n };
// Sizes of each block
int subsizes[2] = { block_size, block_size };
// Begining of current block
int starts[2] = { 0,0 };
// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);
// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
int* displs = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
if (world_rank == 0)
{
for (int i = 0; i < proc_matrix_size * proc_matrix_size; i++)
sendcounts[i] = 1;
int disp = 0;
for (int i = 0; i < proc_matrix_size; i++) {
for (int j = 0; j < proc_matrix_size; j++) {
displs[i * proc_matrix_size + j] = disp;
disp += 1;
}
disp += ((n / proc_matrix_size)-1) * proc_matrix_size;
}
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
for (int i = 0; i < block_size; i++) {
for (int j = 0; j < block_size; j++) {
a[i][j] = 10 + a[i][j];
b[i][j] = 10 + b[i][j];
}
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
globalA2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
globalB2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
老:我想提一下,目前,我正试图在默认通信器上发送块,并计划在管理发送矩阵块之后实现移位操作和笛卡尔通信器 .
我需要的帮助是关于Scatterv函数,它会抛出以下错误:
job aborted:[rank] message [0]致命错误MPI_Scatterv中的致命错误:计数无效,错误堆栈:MPI_Scatterv(sbuf = 0x0000029262048D40,scnts = 0x00000292620482B0,displs = 0x0000029262048250,dtype = USER,rbuf = 0x000002926203ED30,rcount = 25, MPI_INT,root = 0,MPI_COMM_WORLD)失败否定计数,值为-1912594387 [1-7]终止
这是我到目前为止编写的代码:
#include "stdafx.h"
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include <assert.h>
#include <cstdlib>
#include <math.h>
int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) malloc(n*m * sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**) malloc(n * sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;
// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;
// Declare matrix dymensions
int ma = 0, na = 0;
int mb = 0, nb = 0;
// Nr of processes on each line/collumn in process mesh
int proc_grid_size = (int)sqrt(world_size);
// Single value for quadratic matrix size
int n = 0;
// Nr of elements on each line/collumn in local matrix
// of each process
int block_size = 0;
// Open files and read matrices
if (world_rank == 0)
{
fa = fopen(argv[1], "r");
fb = fopen(argv[2], "r");
// Read matrix dymensions
fscanf(fa, "%d %d\n", &ma, &na);
fscanf(fb, "%d %d\n", &mb, &nb);
// Check if matrices are quadratic
if ((ma != na) && (na != mb) && (mb != nb))
{
printf("Invalid matrices dimensions\n");
return 0;
}
n = na;
// Check if sqrt(nr_processes) divides matrix dimension
if ((n % proc_grid_size != 0) || (world_size % proc_grid_size != 0))
{
printf("Number of processes does not fit matrix size\n");
return 0;
}
block_size = n / proc_grid_size;
// Initialize matrices
A = (int**)calloc(n, sizeof(int*));
B = (int**)calloc(n, sizeof(int*));
//C = (int**)calloc(n, sizeof(int*));
for (int i = 0; i < n; i++)
{
A[i] = (int*)calloc(n, sizeof(int));
B[i] = (int*)calloc(n, sizeof(int));
//C[i] = (int*)calloc(n, sizeof(int));
}
// Read matrix A from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fa, "%d ", &A[i][j]);
printf("%d ", A[i][j]);
}
fscanf(fa, "\n");
printf("\n");
}
// Read matrix B from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fb, "%d ", &B[i][j]);
printf("%d ", B[i][j]);
}
fscanf(fb, "\n");
printf("\n");
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/*
Divide matrices in blocks and send each block to the corresponding process
*/
// Sizes of input global matrix
int sizes[2] = { n, n };
// Sizes of each block
int subsizes[2] = { block_size, block_size };
// Begining of current block
int starts[2] = { 0,0 };
// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);
// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int **A2 = NULL;
int **B2 = NULL;
malloc2D(&A2, n, n);
malloc2D(&B2, n, n);
// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;
if (world_rank == 0)
{
globalAptr = &(A[0][0]);
globalBptr = &(B[0][0]);
globalA2ptr = &(A2[0][0]);
globalB2ptr = &(B2[0][0]);
}
// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
int* displs = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
if (world_rank == 0)
{
for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
sendcounts[i] = 1;
int disp = 0;
for (int i = 0; i < proc_grid_size; i++) {
for (int j = 0; j < proc_grid_size; j++) {
displs[i * proc_grid_size + j] = disp;
disp += 1;
}
disp += ((block_size) - 1) * proc_grid_size;
}
for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
{
printf("Send cound: %d\n", sendcounts[i]);
}
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
// Now each processor has its local array, and can process it
for (int i = 0; i < block_size; i++) {
for (int j = 0; j < block_size; j++) {
a[i][j] = 10 + a[i][j];
b[i][j] = 10 + b[i][j];
}
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
globalA2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
globalB2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
非常感谢你!