首页 文章

Linux POSIX C LibPCRE`双免费或腐败(fasttop)`崩溃

提问于
浏览
2

我有以下代码(它读取进程虚拟内存并使用libpcre匹配一些字符串),它编译没有错误,但如果我用 -Wall 编译它,我得到一些警告,我会在代码后显示 .

已编译的代码运行但崩溃 *** glibc detected *** ./readmempcreuniq: double free or corruption (fasttop): 0x097b9c80 *** ,我怀疑问题出在 pcre_get_substring(page, vector, pairs, 0, &buff); 行,因为该函数的第一个参数需要 'const char *' 但得到 'unsigned char *' ,我怎么能正确?

#ifdef TARGET_64
// for 64bit target (see /proc/cpuinfo addr size virtual)
#define MEM_MAX (1ULL << 48)
#else
#define MEM_MAX (1ULL << 32)
#endif

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <pcre.h>
#include <locale.h>

int main(int argc, char **argv)
{
    if (argc < 2) {
        printf("Usage: %s <pid>\n", argv[0]);
        exit(1);
    }

    char buf[128];
    int pid = atoi(argv[1]);
    snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
    int fd = open(buf, O_RDONLY);
    if (fd == -1) {
        fprintf(stderr, "Error opening mem file: %m\n");
        exit(1);
    }

    pcre *f;
    pcre_extra *f_ext;
    char *pattern = "([0-9]{20,22})";
    const char *errstr;
    int errchar;
    int vector[50];
    int vecsize = 50;
    int pairs;
    const char *buff;
    const unsigned char *tables;
    int a;
    int count = 0;
    const char **matches = NULL;
    const char **more_matches;

    char *loc = setlocale(LC_ALL, 0);
    setlocale(LC_ALL, loc);
    tables = pcre_maketables();

    long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
    if (ptret == -1) {
        fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
        close(fd);
        exit(1);
    }

    unsigned char page[4096];
    unsigned long long offset = 0;


    while (offset < MEM_MAX) {
        lseek64(fd, offset, SEEK_SET);

        ssize_t ret;
        ret = read(fd, page, sizeof(page));

        if (ret > 0) {
            page[ret] = '\0';
            if((f = pcre_compile(pattern, PCRE_CASELESS|PCRE_MULTILINE, &errstr, &errchar, tables)) == NULL)
            {
                printf("Error: %s\nCharacter N%i\nPattern:%s\n", errstr, errchar, pattern);
            }
            else
            {
                f_ext = pcre_study(f, 0, &errstr);
                a = 0;

                while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
                {
                    pcre_get_substring(page, vector, pairs, 0, &buff);
                    //printf("%s\n", buff);
                    more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
                    if (more_matches!=NULL)
                    {
                        matches=more_matches;
                        matches[count++]=buff;
                    }
                    else
                    {
                        free(matches);
                        puts("Error (re)allocating memory");
                        exit(1);
                    }
                    a = vector[1] + 1;
                }
                int matches_len = count;
                const char *uniques[matches_len];
                int uniques_len = 0;
                int already_exists;
                int i, j;
                for (i = 0; i < matches_len; i++)
                {
                    already_exists = 0;
                    for ( j = 0; j < uniques_len; j++)
                    {
                        if (!strcmp(matches[i], uniques[j]))
                        {
                            already_exists = 1;
                            break;
                        }
                    }
                    if (!already_exists)
                    {
                        uniques[uniques_len] = matches[i];
                        uniques_len++;
                    }
                }
                for (i = 0; i < uniques_len; i++)
                {
                    printf("%s\n", uniques[i]);
                }
                free(matches);
                pcre_free(f);
            }

        }

        offset += sizeof(page);
    }

    ptrace(PTRACE_DETACH, pid, 0, 0);
    close(fd);
    return 0;
}

错误:

xtmtrx@server:~/regex/proc$ ./readmempcreuniq 5663
92991999918876543209
99299299292663552673
111992229922222288
119988922220000077
*** glibc detected *** ./readmempcreuniq: double free or corruption (fasttop): 0x097b9c80 ***
======= Backtrace: =========
/lib/libc.so.6(+0x6c0c1)[0xb7dfa0c1]
/lib/libc.so.6(+0x6d930)[0xb7dfb930]
/lib/libc.so.6(+0x71681)[0xb7dff681]
/lib/libc.so.6(realloc+0xe3)[0xb7dffb13]
./readmempcreuniq[0x8048c86]
/lib/libc.so.6(__libc_start_main+0xe7)[0xb7da4ce7]
./readmempcreuniq[0x80488b1]
======= Memory map: ========
08048000-0804a000 r-xp 00000000 fd:01 68388533                           /root/regex/proc/readmempcreuniq
0804a000-0804b000 r--p 00001000 fd:01 68388533                           /root/regex/proc/readmempcreuniq
0804b000-0804c000 rw-p 00002000 fd:01 68388533                           /root/regex/proc/readmempcreuniq
097a1000-097c2000 rw-p 097a1000 00:00 0                                  [heap]
b7c00000-b7c21000 rw-p b7c00000 00:00 0
b7c21000-b7d00000 ---p b7c21000 00:00 0
b7d66000-b7d80000 r-xp 00000000 fd:01 65901968                           /lib/libgcc_s.so.1
b7d80000-b7d81000 r--p 00019000 fd:01 65901968                           /lib/libgcc_s.so.1
b7d81000-b7d82000 rw-p 0001a000 fd:01 65901968                           /lib/libgcc_s.so.1
b7d8c000-b7d8e000 rw-p b7d8c000 00:00 0
b7d8e000-b7ee5000 r-xp 00000000 fd:01 65901949                           /lib/libc-2.12.1.so
b7ee5000-b7ee7000 r--p 00157000 fd:01 65901949                           /lib/libc-2.12.1.so
b7ee7000-b7ee8000 rw-p 00159000 fd:01 65901949                           /lib/libc-2.12.1.so
b7ee8000-b7eeb000 rw-p b7ee8000 00:00 0
b7eeb000-b7f1e000 r-xp 00000000 fd:01 65901993                           /lib/libpcre.so.3.12.1
b7f1e000-b7f1f000 r--p 00032000 fd:01 65901993                           /lib/libpcre.so.3.12.1
b7f1f000-b7f20000 rw-p 00033000 fd:01 65901993                           /lib/libpcre.so.3.12.1
b7f29000-b7f2c000 rw-p b7f29000 00:00 0
b7f2c000-b7f48000 r-xp 00000000 fd:01 65901940                           /lib/ld-2.12.1.so
b7f48000-b7f49000 r--p 0001b000 fd:01 65901940                           /lib/ld-2.12.1.so
b7f49000-b7f4a000 rw-p 0001c000 fd:01 65901940                           /lib/ld-2.12.1.so
bf8c1000-bf8d6000 rw-p 7ffffffe9000 00:00 0                              [stack]
Aborted

使用 -Wall 开关编译代码的警告:

xtmtrx @ server:〜/ regex / proc $ gcc -o readmempcreuniq readmempcreuniq.c -lpcre -Wall readmempcreuniq.c:在函数'main'中:readmempcreuniq.c:83:warning:指针传递参数3'pcre_exec'签名/usr/include/pcre.h:286不同:注意:期望'const char *'但参数类型为'unsigned char *'readmempcreuniq.c:85:warning:指针传递'pcre_get_substring'的参数1签名不同/usr/include/pcre.h:297:注意:预期'const char *'但参数类型为'unsigned char *'

编辑:

根据@stdcall提示,我使用 efence 编译了程序,然后在核心转储上使用了GDB:

xtmtrx@server:~/regex/proc$ ./readmempcreuniq 6036

  Electric Fence 2.1 Copyright (C) 1987-1998 Bruce Perens.
5,
Segmentation fault (core dumped)
xtmtrx@server:~/regex/proc$ gdb ./readmempcreuniq core
GNU gdb (GDB) 7.2-ubuntu
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i686-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /root/regex/proc/readmempcreuniq...done.
[New Thread 6093]
Reading symbols from /lib/libpcre.so.3...(no debugging symbols found)...done.
Loaded symbols for /lib/libpcre.so.3
Reading symbols from /usr/lib/libefence.so.0...(no debugging symbols found)...done.
Loaded symbols for /usr/lib/libefence.so.0
Reading symbols from /lib/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libpthread.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib/libpthread.so.0
Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/ld-linux.so.2
Core was generated by `./readmempcreuniq 6036'.
Program terminated with signal 11, Segmentation fault.
#0  0x08048ef8 in main (argc=2, argv=0xbfe1d2d4) at readmempcreuniq.c:125
125                                                     uniques[uniques_len] = matches[i];

似乎@alk是正确的,问题出在 uniques[uniques_len] = matches[i];

新编辑:

根据@alk提示,我改变了这条线:

for ( j = 0; j < uniques_len; j++)

至:

for ( j = 0; j < matches_len; j++)

现在segfault在别处:

Program terminated with signal 11, Segmentation fault.
#0  0x08048ea7 in main (argc=2, argv=0xbfaaa3e4) at readmempcreuniq.c:118
118                                                     if (!strcmp(matches[i], uniques[j]))

2 回答

  • 3

    在循环中的下一轮之前 free() 之后,你永远不会将 matches 重置为 NULL . 因此,在第一轮reallocs之后它仍保留原始值 .

    在进入内部处理循环之前将其设置为 NULL (在第一次传递时为冗余),或者在 free(matches) 之后立即将其设置回 NULL . 或者你可以简单地将它放在next-outer循环的包含范围内,并使用初始的 NULL 值,但前面提到的更改是我能想到的最小的 .

    Examples

    matches = NULL; // HERE
    while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
    {
            pcre_get_substring(page, vector, pairs, 0, &buff);
            //printf("%s\n", buff);
            more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
            if (more_matches!=NULL)
            {
                    matches=more_matches;
                    matches[count++]=buff;
            }
            else
            {
                    free(matches);
                    puts("Error (re)allocating memory");
                    exit(1);
            }
            a = vector[1] + 1;
    }
    

    要么....

    for (i = 0; i < uniques_len; i++)
    {
            printf("%s\n", uniques[i]);
    }
    free(matches);
    matches = NULL; // or HERE
    pcre_free(f);
    

    More Stuff

    继续走下我注意到的事情:

    这个:

    ssize_t ret;
    ret = read(fd, page, sizeof(page));
    
    if (ret > 0) {
            page[ret] = '\0';
    

    似乎试图设置一个null char终止符 . 如果是这样,你在一个完全填充的缓冲区上调用 undefined behavior . 它应该是这样的:

    ssize_t ret = read(fd, page, sizeof(page)-1); // NOTE SPACE FOR TERM
    if (ret > 0) {
            page[ret] = 0;
    

    如果缓冲区的大小是特定的(您出于某种原因选择了4K),则应该是4097以确保最大精确的4K缓冲区 .


    And another...

    您正在阅读我不能声称的页面,或者不需要像我之前在代码中显示的那样终止页面 . 但假设它是,你做了我我建议(或..不),这也看起来错了:

    while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
    

    在这里你传递整个缓冲区的大小;不是您阅读的实际数据的大小 . 我'm the first person to tell you I'm不熟悉API,但我很确定这应该是:

    // notice the length of the buffer passed, ret
    while((pairs = pcre_exec(f, f_ext, page, ret, a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
    

    换句话说,在一个尺寸不足的读数上,你告诉它数据比实际更长 . 我再次对他们的API很天真,但这看起来很合理 .


    Of Unique Matches...

    希望更容易阅读 .

    int matches_len = count, uniques_len = 0;
    int i = 0, j = 0;
    
    const char *uniques[matches_len];
    for (i=0; i < matches_len; ++i)
    {
        for (j = 0; j < uniques_len; ++j)
        {
            if (!strcmp(matches[i], uniques[j]))
                break;
        }
    
        if (j == uniques_len)
            uniques[uniques_len++] = matches[i];
    }
    
    for (i = 0; i < uniques_len; ++i)
        printf("%s\n", uniques[i]);
    

    Continuing on...

    每页后重置 count 为零 . 在 free(matches); matches = NULL; 之后会是一个好地方 .

    值得注意 . 一旦文件读取开始失败,你的外部循环中没有退出情况,因此对于无法在其结束之外寻找的文件将会有很多砰击 . 直到达到限制器数量 .


    Final Thoughts

    我认为这接近你要做的事情:

    #define _LARGEFILE64_SOURCE
    #include <unistd.h>
    #include <stdio.h>
    #include <fcntl.h>
    #include <stdlib.h>
    #include <string.h>
    #include <errno.h>
    #include <sys/mman.h>
    #include <sys/stat.h>
    #include <sys/ptrace.h>
    #include <pcre.h>
    #include <locale.h>
    #include <ctype.h>
    
    int main(int argc, char **argv)
    {
        // CHANGE TO ACCEPT PROC-ID FROM CMDLINE    
        int pid = 5916;
    
        setlocale(LC_ALL,"");
    
        const char *error = NULL;
        int erroffset = 0;
        const char **uniques = NULL;
        size_t uniques_len = 0;
    
        const char regex[] = "[0-9A-Fa-f]{8}";
        pcre* re = pcre_compile (regex,          /* the pattern */
                        PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                        &error,         /* for error message */
                        &erroffset,     /* for error offset */
                        0);             /* use default character tables */
        if (!re)
        {
            printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
            return -1;
        }
    
        // start proc trace
        long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
        if (ptret == -1)
        {
            fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
            exit(1);
        }
    
        char path[256];
        snprintf(path, sizeof(path), "/proc/%d/maps", pid);
        FILE *maps = fopen(path, "r");
        snprintf(path, sizeof(path), "/proc/%d/mem", pid);
        int mem = open(path, O_RDONLY);
    
        if(maps && (mem != -1))
        {
            char buf[BUFSIZ + 1];
            while(fgets(buf, BUFSIZ, maps))
            {
                long long unsigned int start, end;
                if (sscanf(buf, "%llx-%llx", &start, &end) != 2)
                    break;
    
                printf("reading %llx - %llx\n", start, end);
    
                lseek64(mem, start, SEEK_SET);
                while (start < end)
                {
                    char page[4096] =  {0};
                    int rd = read(mem, page, sizeof(page));
                    if (rd < 0)
                        break;
    
                    start += sizeof(page);
    
                    int ov[128] = {0};
                    unsigned int ov_len = 0;
                    int rc = 0;
    
                    while ((rc = pcre_exec(re, 0, page, (int)(rd), ov_len, 0, ov, 128)) >= 0)
                    {
                        int i = 0;
                        for(; i < rc; ++i)
                        {
                            const char *sp = NULL;
                            pcre_get_substring(page, ov, rc, i, &sp);
    
                            // search unique list
                            size_t j=0;
                            for (;j<uniques_len;++j)
                            {
                                if (!strcmp(sp, uniques[j]))
                                break;
                            }
    
                            if (uniques_len == j)
                            {
                                const char **tmp = realloc(uniques, (uniques_len+1)*sizeof(*uniques));
                                if (tmp == NULL)
                                {
                                    perror("Failed to resize uniques.");
                                    pcre_free_substring(sp);
                                }
                                else
                                {
                                    uniques = tmp;
                                    uniques[uniques_len++] = sp;
                                }
                            }
                            else
                            {   // delete string. not needed
                                pcre_free_substring(sp);
                            }
                        }
                        ov_len = ov[2*(rc-1)]+1;
                    }
                }
            }
    
            fclose(maps);
            close(mem);
        }
    
        size_t n = 0;
        for (; n<uniques_len; ++n)
        {
            printf("%s\n", uniques[n]);
            pcre_free_substring(uniques[n]);
        }
        printf("total uniques: %lu\n", uniques_len);
        free(uniques);
    
        ptrace(PTRACE_DETACH, pid, 0, 0);
        return 0;
    }
    

    警告 . 我对此API一无所知,但我在这里看到并在网上简要回顾过 . YMMV UAYOR . 但似乎你一直都这么做 . 只是累积独立于页面的唯一身份(我认为这仍然是一个问题,页面边界,但那是另一天) .

  • 3

    除了WhozCraiganswer指出的问题:

    代码定义

    const char *uniques[matches_len];
    

    但循环其索引 j 直到 <uniques_len

    for ( j = 0; j < uniques_len; j++)
        {
          if (!strcmp(matches[i], uniques[j]))
          {
            already_exists = 1;
            break;
          }
        }
    

    所以 uniques 很可能被越界限制,导致未定义的行为导致崩溃 .

    Update:

    进一步的投资表明这是一个问题,尽管这是一个危险的建筑 .


    问题是 matches 没有指向此行中正确分配的内存:

    if (!strcmp(matches[i], uniques[j]))
    

    要揭示此问题,请通过更改此代码在_2583370内存后添加适当的内存初始化:

    int count = 0;
              const char ** matches = NULL;
              [...]
    
                        more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
                        if (more_matches!=NULL)
                        {
                            matches=more_matches;
                            matches[count++]=buff;
                        }
    

    成为:

    size_t count = 0, count_prev = 0;
              const char ** matches = NULL;
              [...]
    
                        more_matches = realloc(matches, (count + 1) * sizeof(*more_matches));
                        if (more_matches != NULL)
                        {
                            memset(more_matches + count_prev, 0, (count + 1 - count_prev) * sizeof(*more_matches));
                            count_prev = count;
                            matches = more_matches;
                            matches[count++] = buff;
                        }
    

    作为一般建议:当调试总是使用符号编译时(选项 -g 到gcc),然后在gdb和Valgrind下运行代码 . 那两个工具可以解释代码中的大多数问题 .

相关问题