fwrite性能瓶颈：大数据写入的坑你踩过吗？

创作时间:

作者:

@小白创作中心

fwrite性能瓶颈：大数据写入的坑你踩过吗？

引用

CSDN

等

来源

https://blog.csdn.net/weixin_43969601/article/details/116275395

https://blog.csdn.net/zsz_shsf/article/details/137385208

https://blog.csdn.net/sinat_19383265/article/details/136577677

https://blog.csdn.net/SASfafaah/article/details/139415062

https://docs.pingcode.com/baike/1252856

https://www.cnblogs.com/cyamazing/p/18297046

https://my.oschina.net/emacs_8785140/blog/17258471

在C语言中，fwrite函数是进行文件写入操作的重要工具。然而，当处理大数据写入时，fwrite可能会遇到性能瓶颈，导致程序运行缓慢甚至失败。本文将深入分析fwrite的性能瓶颈，并提供有效的优化方案。

性能瓶颈分析

缓冲区机制带来的性能开销

fwrite默认使用标准库的缓冲机制，当数据量很大时，频繁的缓冲区刷新会带来显著的性能开销。每次缓冲区满时，数据需要从用户空间复制到内核空间，然后写入磁盘，这个过程会消耗大量CPU时间。

系统调用频繁导致的性能下降

每次fwrite都会导致一次系统调用，过多的系统调用会显著降低程序性能。系统调用涉及用户态到内核态的切换，每次切换都需要保存和恢复寄存器状态，带来额外的开销。

磁盘I/O瓶颈

在大数据写入场景下，磁盘I/O可能成为性能瓶颈。特别是当磁盘速度较慢时，频繁的写入操作会导致磁盘队列积压，进一步降低写入效率。

优化方案

针对上述性能瓶颈，我们可以采取以下优化措施：

使用大缓冲区减少I/O操作次数

通过增加缓冲区大小，可以显著减少I/O操作的次数。例如，使用1MB的缓冲区而不是默认的8KB缓冲区，可以将I/O操作次数减少到原来的1/128。

#include <stdio.h>
#include <stdlib.h>

void writeWithBuffer(const char *filename, const void *data, size_t dataSize) {
    FILE *fp = fopen(filename, "wb");
    if (fp == NULL) {
        perror("File opening failed");
        return;
    }
    const size_t bufferSize = 1024 * 1024; // 1MB buffer
    char *buffer = (char *)malloc(bufferSize);
    if (buffer == NULL) {
        perror("Memory allocation failed");
        fclose(fp);
        return;
    }
    size_t bytesWritten = 0;
    while (bytesWritten < dataSize) {
        size_t remaining = dataSize - bytesWritten;
        size_t chunkSize = (remaining > bufferSize) ? bufferSize : remaining;
        memcpy(buffer, (char *)data + bytesWritten, chunkSize);
        fwrite(buffer, 1, chunkSize, fp);
        bytesWritten += chunkSize;
    }
    free(buffer);
    fclose(fp);
}

采用分块写入策略

将大数据分成若干小块进行写入，可以有效减少内存占用，同时便于处理大文件。这种方法特别适用于内存有限的场景。

#include <stdio.h>
#include <stdlib.h>

void writeInChunks(const char *filename, const void *data, size_t dataSize, size_t chunkSize) {
    FILE *fp = fopen(filename, "wb");
    if (fp == NULL) {
        perror("File opening failed");
        return;
    }
    char *buffer = (char *)malloc(chunkSize);
    if (buffer == NULL) {
        perror("Memory allocation failed");
        fclose(fp);
        return;
    }
    size_t bytesWritten = 0;
    while (bytesWritten < dataSize) {
        size_t remaining = dataSize - bytesWritten;
        size_t currentChunkSize = (remaining > chunkSize) ? chunkSize : remaining;
        memcpy(buffer, (char *)data + bytesWritten, currentChunkSize);
        fwrite(buffer, 1, currentChunkSize, fp);
        bytesWritten += currentChunkSize;
    }
    free(buffer);
    fclose(fp);
}

利用内存映射提高写入效率

内存映射是一种将文件内容映射到内存地址空间的方法，可以直接通过内存访问文件内容，避免传统的I/O操作，从而提高访问速度。C语言可以通过mmap函数实现内存映射。

#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

void writeWithMemoryMapping(const char *filename, const void *data, size_t dataSize) {
    int fd = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
    if (fd == -1) {
        perror("File opening failed");
        return;
    }
    if (ftruncate(fd, dataSize) == -1) {
        perror("ftruncate failed");
        close(fd);
        return;
    }
    void *mappedData = mmap(NULL, dataSize, PROT_WRITE, MAP_SHARED, fd, 0);
    if (mappedData == MAP_FAILED) {
        perror("mmap failed");
        close(fd);
        return;
    }
    memcpy(mappedData, data, dataSize);
    msync(mappedData, dataSize, MS_SYNC);
    munmap(mappedData, dataSize);
    close(fd);
}