From 812a8b9ea2a2d61cc81ae9e6187c716f13849d72 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 16 Jul 2024 14:24:12 +0300 Subject: [PATCH 1/2] core: Switch io_ring to Readv and Writev The Readv and Writev opcodes are available on all Linux kernel versions so let's switch to them. --- core/io/linux.rs | 54 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/core/io/linux.rs b/core/io/linux.rs index b1750d565..194943cd6 100644 --- a/core/io/linux.rs +++ b/core/io/linux.rs @@ -1,24 +1,51 @@ use super::{Completion, File, WriteCompletion, IO}; use anyhow::Result; +use libc::iovec; use log::trace; -use std::cell::RefCell; +use std::cell::{Ref, RefCell}; use std::os::unix::fs::OpenOptionsExt; use std::os::unix::io::AsRawFd; use std::rc::Rc; +const MAX_IOVECS: usize = 128; + pub struct LinuxIO { - ring: Rc>, + inner: Rc>, +} + +pub struct InnerLinuxIO { + ring: io_uring::IoUring, + iovecs: [iovec; MAX_IOVECS], + next_iovec: usize, } impl LinuxIO { pub fn new() -> Result { - let ring = io_uring::IoUring::new(128)?; + let ring = io_uring::IoUring::new(MAX_IOVECS as u32)?; + let inner = InnerLinuxIO { + ring: ring, + iovecs: [iovec { + iov_base: std::ptr::null_mut(), + iov_len: 0, + }; MAX_IOVECS], + next_iovec: 0, + }; Ok(Self { - ring: Rc::new(RefCell::new(ring)), + inner: Rc::new(RefCell::new(inner)), }) } } +impl InnerLinuxIO { + pub fn get_iovec<'a>(&'a mut self, buf: *const u8, len: usize) -> &'a iovec { + let iovec = &mut self.iovecs[self.next_iovec]; + iovec.iov_base = buf as *mut std::ffi::c_void; + iovec.iov_len = len; + self.next_iovec = (self.next_iovec + 1) % MAX_IOVECS; + iovec + } +} + impl IO for LinuxIO { fn open_file(&self, path: &str) -> Result> { trace!("open_file(path = {})", path); @@ -28,14 +55,15 @@ impl IO for LinuxIO { .custom_flags(libc::O_DIRECT) .open(path)?; Ok(Rc::new(LinuxFile { - ring: self.ring.clone(), + io: self.inner.clone(), file, })) } fn run_once(&self) -> Result<()> { trace!("run_once()"); - let mut ring = self.ring.borrow_mut(); + let mut inner = self.inner.borrow_mut(); + let mut ring = &mut inner.ring; ring.submit_and_wait(1)?; while let Some(cqe) = ring.completion().next() { let c = unsafe { Rc::from_raw(cqe.user_data() as *const Completion) }; @@ -46,7 +74,7 @@ impl IO for LinuxIO { } pub struct LinuxFile { - ring: Rc>, + io: Rc>, file: std::fs::File, } @@ -54,17 +82,19 @@ impl File for LinuxFile { fn pread(&self, pos: usize, c: Rc) -> Result<()> { trace!("pread(pos = {}, length = {})", pos, c.buf().len()); let fd = io_uring::types::Fd(self.file.as_raw_fd()); + let mut io = self.io.borrow_mut(); let read_e = { let mut buf = c.buf_mut(); let len = buf.len(); let buf = buf.as_mut_ptr(); let ptr = Rc::into_raw(c.clone()); - io_uring::opcode::Read::new(fd, buf, len as u32) + let iovec = io.get_iovec(buf, len); + io_uring::opcode::Readv::new(fd, iovec, 1) .offset(pos as u64) .build() .user_data(ptr as u64) }; - let mut ring = self.ring.borrow_mut(); + let mut ring = &mut io.ring; unsafe { ring.submission() .push(&read_e) @@ -79,16 +109,18 @@ impl File for LinuxFile { buffer: Rc>, c: Rc, ) -> Result<()> { + let mut io = self.io.borrow_mut(); let fd = io_uring::types::Fd(self.file.as_raw_fd()); let write = { let buf = buffer.borrow(); let ptr = Rc::into_raw(c.clone()); - io_uring::opcode::Write::new(fd, buf.as_ptr(), buf.len() as u32) + let iovec = io.get_iovec(buf.as_ptr(), buf.len()); + io_uring::opcode::Writev::new(fd, iovec, 1) .offset(pos as u64) .build() .user_data(ptr as u64) }; - let mut ring = self.ring.borrow_mut(); + let mut ring = &mut io.ring; unsafe { ring.submission() .push(&write) From 207ec5ce92de64b1b85669fbb95e1e5c9f79024c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 16 Jul 2024 14:33:00 +0300 Subject: [PATCH 2/2] core: Enable direct I/O optimistically ...but keep going if the filesystem does not support it. Fixes Limbo on older Linux kernels that have io_uring but don't support direct I/O on all filesystems, such as tmpfs or encryptfs. --- Cargo.lock | 23 +++++++++++++++++++++-- core/Cargo.toml | 1 + core/io/linux.rs | 7 +++++-- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 079e5b2f0..1ec12aa63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,6 +205,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "cfg_block" version = "0.1.1" @@ -972,6 +978,7 @@ dependencies = [ "libc", "log", "mimalloc", + "nix 0.29.0", "ordered-multimap", "polling", "pprof", @@ -1076,6 +1083,18 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "num-format" version = "0.4.4" @@ -1274,7 +1293,7 @@ dependencies = [ "inferno", "libc", "log", - "nix", + "nix 0.26.4", "once_cell", "parking_lot", "smallvec", @@ -1525,7 +1544,7 @@ dependencies = [ "libc", "log", "memchr", - "nix", + "nix 0.26.4", "radix_trie", "scopeguard", "unicode-segmentation", diff --git a/core/Cargo.toml b/core/Cargo.toml index e4c4b3b25..97c6056b8 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -33,6 +33,7 @@ cfg_block = "0.1.1" fallible-iterator = "0.3.0" libc = "0.2.155" log = "0.4.20" +nix = { version = "0.29.0", features = ["fs"] } ordered-multimap = "0.7.1" sieve-cache = "0.1.4" sqlite3-parser = "0.11.0" diff --git a/core/io/linux.rs b/core/io/linux.rs index 194943cd6..a8c4defc2 100644 --- a/core/io/linux.rs +++ b/core/io/linux.rs @@ -3,7 +3,7 @@ use anyhow::Result; use libc::iovec; use log::trace; use std::cell::{Ref, RefCell}; -use std::os::unix::fs::OpenOptionsExt; +use nix::fcntl::{self, FcntlArg, OFlag}; use std::os::unix::io::AsRawFd; use std::rc::Rc; @@ -52,8 +52,11 @@ impl IO for LinuxIO { let file = std::fs::File::options() .read(true) .write(true) - .custom_flags(libc::O_DIRECT) .open(path)?; + // Let's attempt to enable direct I/O. Not all filesystems support it + // so ignore any errors. + let fd = file.as_raw_fd(); + let _= nix::fcntl::fcntl(fd, FcntlArg::F_SETFL(OFlag::O_DIRECT)); Ok(Rc::new(LinuxFile { io: self.inner.clone(), file,