diff --git a/Cargo.lock b/Cargo.lock index b68af76cc..183f58144 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,6 +205,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "cfg_block" version = "0.1.1" @@ -972,6 +978,7 @@ dependencies = [ "libc", "log", "mimalloc", + "nix 0.29.0", "ordered-multimap", "polling", "pprof", @@ -1077,6 +1084,18 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "num-format" version = "0.4.4" @@ -1275,7 +1294,7 @@ dependencies = [ "inferno", "libc", "log", - "nix", + "nix 0.26.4", "once_cell", "parking_lot", "smallvec", @@ -1526,7 +1545,7 @@ dependencies = [ "libc", "log", "memchr", - "nix", + "nix 0.26.4", "radix_trie", "scopeguard", "unicode-segmentation", diff --git a/core/Cargo.toml b/core/Cargo.toml index 7ab4f1c44..713f84249 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -33,6 +33,7 @@ cfg_block = "0.1.1" fallible-iterator = "0.3.0" libc = "0.2.155" log = "0.4.20" +nix = { version = "0.29.0", features = ["fs"] } ordered-multimap = "0.7.1" sieve-cache = "0.1.4" sqlite3-parser = "0.11.0" diff --git a/core/io/linux.rs b/core/io/linux.rs index b1750d565..a8c4defc2 100644 --- a/core/io/linux.rs +++ b/core/io/linux.rs @@ -1,41 +1,72 @@ use super::{Completion, File, WriteCompletion, IO}; use anyhow::Result; +use libc::iovec; use log::trace; -use std::cell::RefCell; -use std::os::unix::fs::OpenOptionsExt; +use std::cell::{Ref, RefCell}; +use nix::fcntl::{self, FcntlArg, OFlag}; use std::os::unix::io::AsRawFd; use std::rc::Rc; +const MAX_IOVECS: usize = 128; + pub struct LinuxIO { - ring: Rc>, + inner: Rc>, +} + +pub struct InnerLinuxIO { + ring: io_uring::IoUring, + iovecs: [iovec; MAX_IOVECS], + next_iovec: usize, } impl LinuxIO { pub fn new() -> Result { - let ring = io_uring::IoUring::new(128)?; + let ring = io_uring::IoUring::new(MAX_IOVECS as u32)?; + let inner = InnerLinuxIO { + ring: ring, + iovecs: [iovec { + iov_base: std::ptr::null_mut(), + iov_len: 0, + }; MAX_IOVECS], + next_iovec: 0, + }; Ok(Self { - ring: Rc::new(RefCell::new(ring)), + inner: Rc::new(RefCell::new(inner)), }) } } +impl InnerLinuxIO { + pub fn get_iovec<'a>(&'a mut self, buf: *const u8, len: usize) -> &'a iovec { + let iovec = &mut self.iovecs[self.next_iovec]; + iovec.iov_base = buf as *mut std::ffi::c_void; + iovec.iov_len = len; + self.next_iovec = (self.next_iovec + 1) % MAX_IOVECS; + iovec + } +} + impl IO for LinuxIO { fn open_file(&self, path: &str) -> Result> { trace!("open_file(path = {})", path); let file = std::fs::File::options() .read(true) .write(true) - .custom_flags(libc::O_DIRECT) .open(path)?; + // Let's attempt to enable direct I/O. Not all filesystems support it + // so ignore any errors. + let fd = file.as_raw_fd(); + let _= nix::fcntl::fcntl(fd, FcntlArg::F_SETFL(OFlag::O_DIRECT)); Ok(Rc::new(LinuxFile { - ring: self.ring.clone(), + io: self.inner.clone(), file, })) } fn run_once(&self) -> Result<()> { trace!("run_once()"); - let mut ring = self.ring.borrow_mut(); + let mut inner = self.inner.borrow_mut(); + let mut ring = &mut inner.ring; ring.submit_and_wait(1)?; while let Some(cqe) = ring.completion().next() { let c = unsafe { Rc::from_raw(cqe.user_data() as *const Completion) }; @@ -46,7 +77,7 @@ impl IO for LinuxIO { } pub struct LinuxFile { - ring: Rc>, + io: Rc>, file: std::fs::File, } @@ -54,17 +85,19 @@ impl File for LinuxFile { fn pread(&self, pos: usize, c: Rc) -> Result<()> { trace!("pread(pos = {}, length = {})", pos, c.buf().len()); let fd = io_uring::types::Fd(self.file.as_raw_fd()); + let mut io = self.io.borrow_mut(); let read_e = { let mut buf = c.buf_mut(); let len = buf.len(); let buf = buf.as_mut_ptr(); let ptr = Rc::into_raw(c.clone()); - io_uring::opcode::Read::new(fd, buf, len as u32) + let iovec = io.get_iovec(buf, len); + io_uring::opcode::Readv::new(fd, iovec, 1) .offset(pos as u64) .build() .user_data(ptr as u64) }; - let mut ring = self.ring.borrow_mut(); + let mut ring = &mut io.ring; unsafe { ring.submission() .push(&read_e) @@ -79,16 +112,18 @@ impl File for LinuxFile { buffer: Rc>, c: Rc, ) -> Result<()> { + let mut io = self.io.borrow_mut(); let fd = io_uring::types::Fd(self.file.as_raw_fd()); let write = { let buf = buffer.borrow(); let ptr = Rc::into_raw(c.clone()); - io_uring::opcode::Write::new(fd, buf.as_ptr(), buf.len() as u32) + let iovec = io.get_iovec(buf.as_ptr(), buf.len()); + io_uring::opcode::Writev::new(fd, iovec, 1) .offset(pos as u64) .build() .user_data(ptr as u64) }; - let mut ring = self.ring.borrow_mut(); + let mut ring = &mut io.ring; unsafe { ring.submission() .push(&write)