首页 文章

为什么c中的组合数据结构的性能低于独立数组

提问于
浏览
-1

这是我用Linux命令编译的测试代码

g main.cpp -O3 -o stest

我尝试了两种方法来组合数据( test2 test3 ) . 但是,这两种方式都未能达到我预期的更好的性能 . 在我看来,组合数据应该比独立数组具有更好的性能,因为较高的高速缓存从较低的高速缓存逐块加载数据 . 因此,组合数据在一次内存访问中有更多机会加载 . 但是,独立数组( test1 )需要三次内存访问 . 但是,测试结果表明 test1 具有最佳性能 . 它's too strange for me. And, I don'知道原因 . 如果你知道,请告诉我 . 提前致谢 .

#include <iostream>
#include <cstdlib>
#include <unistd.h>
#include <string.h>
#include <sstream>
#include <sys/times.h>
#include <cmath>
using namespace std;

tms start, tEnd;

long long test1(int n) {
    int *a = new int[n];
    int *b = new int[n];
    int *c = new int[n];


    times(&start);
    for (int i = 0; i < n; i++) {
        a[i] = b[i] = i;
    }

    for (int i = 0; i < n; i++) {
        c[i] = a[i] + b[i];
    }
    long long sum = 0;
    for (int i = 0; i < n; i++) {
        sum += c[i];
    }

    times(&tEnd);

    double elap_time = double(tEnd.tms_utime - start.tms_utime + tEnd.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK);
    cout << "test1: " << elap_time << "ms  result=" << sum << " " << endl;
    delete[] a;
    delete[] b;
    delete[] c;
    return sum;
}

struct D {
    int a, b, c;
};

long long test2(int n) {
    struct D *d = new D[n];

    times(&start);
    for (int i = 0; i < n; i++) {
        struct D &di = d[i];
        di.a = di.b = i;
    }

    for (int i = 0; i < n; i++) {
        struct D &di = d[i];
        di.c = di.a + di.b;
    }
    long long sum = 0;
    for (int i = 0; i < n; i++) {
        sum += d[i].c;
    }

    times(&tEnd);
    double elap_time = double(tEnd.tms_utime - start.tms_utime + tEnd.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK);
    cout << "test2: " << elap_time << "ms  result=" << sum << " " << endl;
    delete [] d;
    return sum;
}

long long test3(int n) {
    int *abc = new int[3 * n];

    times(&start);
    for (int i = 0; i < n; i++) {
        int base = 3 * i;
        abc[base] = abc[base + 1] = i;
    }

    for (int i = 0; i < n; i++) {
        int base = 3 * i;
        abc[base + 2] = abc[base] + abc[base + 1];
    }

    long long sum = 0;
    for (int i = 0; i < n; i++) {
        sum += abc[3 * i + 2];
    }

    times(&tEnd);
    double elap_time = double(tEnd.tms_utime - start.tms_utime + tEnd.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK);
    cout << "test3: " << elap_time << "ms  result=" << sum << " " << endl;
    delete [] abc;
    return sum;
}


int main(int argc, char *argv[]) {
    int n = 9999999;
    sscanf(argv[1], "%d", &n);
    test1(n);
    test2(n);
    test3(n);

    cout<<"after changing order"<<endl;

    test2(n);
    test3(n);
    test1(n);

    cout<<"after changing order"<<endl;
    test3(n);
    test1(n);
    test2(n);

    return 0;
}

我在一台带有四个i5-4460 CPU和一个 8GB 内存的计算机上测试了 stest . 这是我用来测试程序的命令,我确信使用参数 399999999 ,计算机不会出现内存不足:

q@q-lab:~/Desktop$ ./stest 399999999
test1: 1.61ms  result=159999998800000002 
test2: 2.38ms  result=159999998800000002 
test3: 2.37ms  result=159999998800000002 
after changing order
test2: 2.38ms  result=159999998800000002 
test3: 2.38ms  result=159999998800000002 
test1: 1.61ms  result=159999998800000002 
after changing order
test3: 2.38ms  result=159999998800000002 
test1: 1.61ms  result=159999998800000002 
test2: 2.39ms  result=159999998800000002

1 回答

  • -1

    组合数据需要更多时间来寻址或计算数据偏移 . 并且,CPU缓存是不可预测的并且难以优化 . 最好不要尝试优化CPU缓存 .

相关问题