I am working on a concurrent file access scenario on ext4 (Ubuntu) where I have a setup where one writer receives data and writes it to a file, while multiple readers concurrently read from that file. To improve performance, I’ve configured the writer side to use O_DIRECT as it reduced load on the filesystem, making operations faster.
Writer:
The writer opens the file once with fopen("wb") and writes in a loop using fwrite()
#include <iostream>
#include <thread>
#include <chrono>
#include <cstring>
#define OUTPUT_FILE "/data/out.txt"
size_t chunk_size = 0;
size_t iterations = 0;
void write_data() {
FILE* outFile = fopen(OUTPUT_FILE, "ab");
if (!outFile) {
std::cerr << "Error opening file for writing!" << std::endl;
return;
}
char buffer[chunk_size];
memset(buffer, 'A', chunk_size); // Fill buffer with 'A' characters
while (true) {
for(int i=0; i<chunk_size; i++) {
buffer[i] = 'A' + (rand() % 26);
}
fwrite(buffer, 1, chunk_size, outFile);
fflush(outFile); // Ensure data is written immediately
std::cout<<"Wrote "<<chunk_size<<" bytes to file"<<std::endl;
std::this_thread::sleep_for(std::chrono::microseconds(5));
if (--iterations == 0) {
break;
}
}
fclose(outFile);
}
int main(int argc, char* argv[]) {
chunk_size = atoi(argv[1]);
iterations = atoi(argv[2]);
write_data();
return 0;
}
Reader :
The reader opens the file using open(filename, O_RDONLY | O_DIRECT) and reads using ::read().
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#define BUFFER_SIZE 524288
#define O_DIRECT_ALIGNMENT 4096
int main( int argc, char *argv[] )
{
size_t chunk_size = atoi(argv[1]);
size_t iterations = atoi(argv[2]);
int fd;
char *src_buffer_;
size_t curr_pos_ = 0;
size_t offset = 0;
std::string output_file = "/data/out2.txt";
FILE* outFile = fopen(output_file.c_str(), "wb");
// Open file with O_DIRECT
fd = open("/data/out.txt", O_RDONLY | O_DIRECT);
if (fd == -1) {
perror("Error opening file");
return EXIT_FAILURE;
}
// Allocate aligned memory for O_DIRECT
src_buffer_ = (char *) aligned_alloc(O_DIRECT_ALIGNMENT, BUFFER_SIZE);
// Read in a loop
while (1) {
ssize_t bytes_read = 0;
off_t non_aligned_bytes = (curr_pos_) & (O_DIRECT_ALIGNMENT - 1);
if (non_aligned_bytes)
lseek(fd, curr_pos_ - non_aligned_bytes, SEEK_SET);
bytes_read = ::read(fd, src_buffer_, BUFFER_SIZE);
if (bytes_read < 0)
throw std::runtime_error(std::string("ERROR: read failed with error: ") + strerror(errno));
size_t read_size = bytes_read > non_aligned_bytes ? bytes_read - non_aligned_bytes : 0;
curr_pos_ += read_size;
std::cout<<curr_pos_<<","<<bytes_read<<","<<non_aligned_bytes<<std::endl;
//write to file
fwrite(src_buffer_ + non_aligned_bytes, 1, read_size, outFile);
if(curr_pos_ == chunk_size * iterations)
break;
}
// Free the aligned src_buffer_
free(src_buffer_);
// Close file
close(fd);
fclose(outFile);
return 0;
}
I've ensured that all alignment requirements of O_DIRECT are met, ie. offset, buffer and size is aligned to block size.
However, when I compared out.txt and out2.txt, I observed inconsistent reads—at some points, the reader reads 0x00 from the file instead of valid data.
Seems like, the issue is that size of file is being updated before the data being written correctly
My Questions:
Why is the reader seeing 0x00 instead of actual data?
Is there an issue with O_DIRECT and fopen("wb") that could cause this inconsistency?
Would using fsync after fflush ensure correctness?
Would switching to fopen("ab") (append mode) improve safety?
Is it even safe to read from a file that is being concurrently written in ext4?