大家 . 希望有人能帮助我 . 我有一个代码来在openMP上并行Prim的算法,我需要让它在Xeon Phi上运行 . 请帮我 . 我真的不明白怎么做 . 这是我在openMP上的代码 .
void ParallelPrim(double *pMatrix, TTreeNode** pMinSpanningTree, int Size)
{
int LastAdded;
TGraphNode NearestNode;
TGraphNode **NotInMinSpanningTree = new TGraphNode* [Size-1];
LastAdded = 0;
for(int i = 0; i < Size-1; i++)
{
NotInMinSpanningTree[i] = new TGraphNode;
NotInMinSpanningTree[i]->NodeNum = i+1;
NotInMinSpanningTree[i]->Distance = -1.0f;
NotInMinSpanningTree[i]->ParentNodeNum = -1;
}
for(int Iter = 1; Iter < Size; Iter++)
{
#pragma omp parallel for
for(int i = 0; i < Size-1; i++)
if(NotInMinSpanningTree[i] != NULL)
{
double t1 = NotInMinSpanningTree[i]->Distance;
double t2 = pMatrix[(NotInMinSpanningTree[i]->NodeNum) * Size + LastAdded];
if(((t1 < 0) && (t2 > 0)) || (t1>0) && (t2 > 0) && (t1 > t2))
{
NotInMinSpanningTree[i]->Distance = t2;
NotInMinSpanningTree[i]->ParentNodeNum = LastAdded;
}
}
NearestNode.NodeNum = -1;
NearestNode.Distance = 3000;
#pragma omp parallel
{
TGraphNode ThreadNearestNode;
ThreadNearestNode.NodeNum = -1;
ThreadNearestNode.Distance = 3000;
#pragma omp for
for(int i = 0; i < Size-1; i++)
{
if(NotInMinSpanningTree[i] != NULL)
{
double t1 = NotInMinSpanningTree[i]->Distance;
double t2 = ThreadNearestNode.Distance;
if((t1 > 0) && (t1 < t2) )
{
ThreadNearestNode.Distance = t1;
ThreadNearestNode.NodeNum = NotInMinSpanningTree[i]->NodeNum;
}
}
}
#pragma omp critical
{
if(ThreadNearestNode.Distance < NearestNode.Distance)
{
NearestNode.Distance = ThreadNearestNode.Distance;
NearestNode.NodeNum = ThreadNearestNode.NodeNum;
}
}
}
pMinSpanningTree[NearestNode.NodeNum] = new TTreeNode;
pMinSpanningTree[NearestNode.NodeNum]->NodeNum = NotInMinSpanningTree[NearestNode.NodeNum-1]->ParentNodeNum;
pMinSpanningTree[NearestNode.NodeNum]->Distance = NearestNode.Distance;
int Parent = NotInMinSpanningTree[NearestNode.NodeNum-1]->ParentNodeNum;
if(pMinSpanningTree[Parent] != NULL)
{
TTreeNode *tmp = new TTreeNode;
tmp->Distance = NearestNode.Distance;
tmp->NodeNum = NearestNode.NodeNum;
}
else
{
pMinSpanningTree[Parent] = new TTreeNode;
pMinSpanningTree[Parent]->Distance = NearestNode.Distance;
pMinSpanningTree[Parent]->NodeNum = NearestNode.NodeNum;
}
LastAdded = NearestNode.NodeNum;
delete NotInMinSpanningTree[NearestNode.NodeNum - 1];
NotInMinSpanningTree[NearestNode.NodeNum - 1] = NULL;
}
delete[] NotInMinSpanningTree;
}
1 回答
在英特尔至强融核协处理器上运行代码有两个基本选项 . 您可以使用-mmic和-qopenmp标志编译整个程序,然后使用micnativeloadex或使用scp将可执行文件和所需库复制到协处理器来运行它 . 或者,您可以省略-mmic,而是修改您的代码,以便您希望在协处理器上运行的代码部分属于卸载部分,其中只有部分代码将被发送到协处理器以运行,其余部分代码将在主机上运行 .
Avi发送给您的演示文稿是协处理器编程的可爱概述 . 此外,您还可以在以下位置找到有关协处理器编译和优化的基本信息:https://software.intel.com/en-us/articles/programming-and-compiling-for-intel-many-integrated-core-architecture .
但是,这是一个很大的问题,你的代码没有矢量化,并且有很多连续的部分 . 要在协处理器上获得最佳性能,您的代码必须进行矢量化和并行化 .