mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-19 09:34:18 +01:00
505 lines
14 KiB
Rust
505 lines
14 KiB
Rust
use crate::storage::buffer_pool::ArenaBuffer;
|
|
use crate::storage::sqlite3_ondisk::WAL_FRAME_HEADER_SIZE;
|
|
use crate::{BufferPool, Result};
|
|
use bitflags::bitflags;
|
|
use cfg_block::cfg_block;
|
|
use std::fmt;
|
|
use std::ptr::NonNull;
|
|
use std::sync::Arc;
|
|
use std::{cell::Cell, fmt::Debug, pin::Pin};
|
|
|
|
pub trait File: Send + Sync {
|
|
fn lock_file(&self, exclusive: bool) -> Result<()>;
|
|
fn unlock_file(&self) -> Result<()>;
|
|
fn pread(&self, pos: usize, c: Completion) -> Result<Completion>;
|
|
fn pwrite(&self, pos: usize, buffer: Arc<Buffer>, c: Completion) -> Result<Completion>;
|
|
fn sync(&self, c: Completion) -> Result<Completion>;
|
|
fn pwritev(&self, pos: usize, buffers: Vec<Arc<Buffer>>, c: Completion) -> Result<Completion> {
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
if buffers.is_empty() {
|
|
c.complete(0);
|
|
return Ok(c);
|
|
}
|
|
if buffers.len() == 1 {
|
|
return self.pwrite(pos, buffers[0].clone(), c);
|
|
}
|
|
// naive default implementation can be overridden on backends where it makes sense to
|
|
let mut pos = pos;
|
|
let outstanding = Arc::new(AtomicUsize::new(buffers.len()));
|
|
let total_written = Arc::new(AtomicUsize::new(0));
|
|
|
|
for buf in buffers {
|
|
let len = buf.len();
|
|
let child_c = {
|
|
let c_main = c.clone();
|
|
let outstanding = outstanding.clone();
|
|
let total_written = total_written.clone();
|
|
let _cloned = buf.clone();
|
|
Completion::new_write(move |n| {
|
|
// reference buffer in callback to ensure alive for async io
|
|
let _buf = _cloned.clone();
|
|
// accumulate bytes actually reported by the backend
|
|
total_written.fetch_add(n as usize, Ordering::Relaxed);
|
|
if outstanding.fetch_sub(1, Ordering::AcqRel) == 1 {
|
|
// last one finished
|
|
c_main.complete(total_written.load(Ordering::Acquire) as i32);
|
|
}
|
|
})
|
|
};
|
|
if let Err(e) = self.pwrite(pos, buf.clone(), child_c) {
|
|
// best-effort: mark as done so caller won't wait forever
|
|
c.complete(-1);
|
|
return Err(e);
|
|
}
|
|
pos += len;
|
|
}
|
|
Ok(c)
|
|
}
|
|
fn size(&self) -> Result<u64>;
|
|
fn truncate(&self, len: usize, c: Completion) -> Result<Completion>;
|
|
fn copy_to(&self, io: &dyn IO, path: &str) -> Result<()> {
|
|
// Open or create the destination file
|
|
let dest_file = io.open_file(path, OpenFlags::Create, false)?;
|
|
// Get the size of the source file
|
|
let file_size = self.size()? as usize;
|
|
if file_size == 0 {
|
|
return Ok(());
|
|
}
|
|
|
|
// use 1MB chunk size
|
|
const BUFFER_SIZE: usize = 1024 * 1024;
|
|
let mut pos = 0;
|
|
|
|
while pos < file_size {
|
|
let chunk_size = (file_size - pos).min(BUFFER_SIZE);
|
|
// Read from source
|
|
let read_buffer = Arc::new(Buffer::allocate(chunk_size, Rc::new(|_| {})));
|
|
let read_completion = self.pread(
|
|
pos,
|
|
Completion::new_read(read_buffer.clone(), move |_, _| {}),
|
|
)?;
|
|
|
|
// Wait for read to complete
|
|
io.wait_for_completion(read_completion)?;
|
|
|
|
// Write to destination
|
|
let write_completion =
|
|
dest_file.pwrite(pos, read_buffer, Completion::new_write(|_| {}))?;
|
|
io.wait_for_completion(write_completion)?;
|
|
|
|
pos += chunk_size;
|
|
}
|
|
let sync_completion = dest_file.sync(Completion::new_sync(|_| {}))?;
|
|
io.wait_for_completion(sync_completion)?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
|
pub struct OpenFlags(i32);
|
|
|
|
bitflags! {
|
|
impl OpenFlags: i32 {
|
|
const None = 0b00000000;
|
|
const Create = 0b0000001;
|
|
const ReadOnly = 0b0000010;
|
|
}
|
|
}
|
|
|
|
impl Default for OpenFlags {
|
|
fn default() -> Self {
|
|
Self::Create
|
|
}
|
|
}
|
|
|
|
pub trait IO: Clock + Send + Sync {
|
|
fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result<Arc<dyn File>>;
|
|
|
|
fn run_once(&self) -> Result<()>;
|
|
|
|
fn wait_for_completion(&self, c: Completion) -> Result<()>;
|
|
|
|
fn generate_random_number(&self) -> i64;
|
|
|
|
fn get_memory_io(&self) -> Arc<MemoryIO>;
|
|
|
|
fn register_fixed_buffer(&self, _ptr: NonNull<u8>, _len: usize) -> Result<u32> {
|
|
Err(crate::LimboError::InternalError(
|
|
"unsupported operation".to_string(),
|
|
))
|
|
}
|
|
}
|
|
|
|
pub type Complete = dyn Fn(Arc<Buffer>, i32);
|
|
pub type WriteComplete = dyn Fn(i32);
|
|
pub type SyncComplete = dyn Fn(i32);
|
|
pub type TruncateComplete = dyn Fn(i32);
|
|
|
|
#[must_use]
|
|
#[derive(Clone)]
|
|
pub struct Completion {
|
|
inner: Arc<CompletionInner>,
|
|
}
|
|
|
|
struct CompletionInner {
|
|
pub completion_type: CompletionType,
|
|
is_completed: Cell<bool>,
|
|
}
|
|
|
|
pub enum CompletionType {
|
|
Read(ReadCompletion),
|
|
Write(WriteCompletion),
|
|
Sync(SyncCompletion),
|
|
Truncate(TruncateCompletion),
|
|
}
|
|
|
|
pub struct ReadCompletion {
|
|
pub buf: Arc<Buffer>,
|
|
pub complete: Box<Complete>,
|
|
}
|
|
|
|
impl Completion {
|
|
pub fn new(completion_type: CompletionType) -> Self {
|
|
Self {
|
|
inner: Arc::new(CompletionInner {
|
|
completion_type,
|
|
is_completed: Cell::new(false),
|
|
}),
|
|
}
|
|
}
|
|
|
|
pub fn new_write<F>(complete: F) -> Self
|
|
where
|
|
F: Fn(i32) + 'static,
|
|
{
|
|
Self::new(CompletionType::Write(WriteCompletion::new(Box::new(
|
|
complete,
|
|
))))
|
|
}
|
|
|
|
pub fn new_read<F>(buf: Arc<Buffer>, complete: F) -> Self
|
|
where
|
|
F: Fn(Arc<Buffer>, i32) + 'static,
|
|
{
|
|
Self::new(CompletionType::Read(ReadCompletion::new(
|
|
buf,
|
|
Box::new(complete),
|
|
)))
|
|
}
|
|
pub fn new_sync<F>(complete: F) -> Self
|
|
where
|
|
F: Fn(i32) + 'static,
|
|
{
|
|
Self::new(CompletionType::Sync(SyncCompletion::new(Box::new(
|
|
complete,
|
|
))))
|
|
}
|
|
|
|
pub fn new_trunc<F>(complete: F) -> Self
|
|
where
|
|
F: Fn(i32) + 'static,
|
|
{
|
|
Self::new(CompletionType::Truncate(TruncateCompletion::new(Box::new(
|
|
complete,
|
|
))))
|
|
}
|
|
pub fn is_completed(&self) -> bool {
|
|
self.inner.is_completed.get()
|
|
}
|
|
|
|
pub fn complete(&self, result: i32) {
|
|
if !self.inner.is_completed.get() {
|
|
match &self.inner.completion_type {
|
|
CompletionType::Read(r) => r.complete(result),
|
|
CompletionType::Write(w) => w.complete(result),
|
|
CompletionType::Sync(s) => s.complete(result), // fix
|
|
CompletionType::Truncate(t) => t.complete(result),
|
|
};
|
|
self.inner.is_completed.set(true);
|
|
}
|
|
}
|
|
|
|
/// only call this method if you are sure that the completion is
|
|
/// a ReadCompletion, panics otherwise
|
|
pub fn as_read(&self) -> &ReadCompletion {
|
|
match self.inner.completion_type {
|
|
CompletionType::Read(ref r) => r,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
/// only call this method if you are sure that the completion is
|
|
/// a WriteCompletion, panics otherwise
|
|
pub fn as_write(&self) -> &WriteCompletion {
|
|
match self.inner.completion_type {
|
|
CompletionType::Write(ref w) => w,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct WriteCompletion {
|
|
pub complete: Box<WriteComplete>,
|
|
}
|
|
|
|
pub struct SyncCompletion {
|
|
pub complete: Box<SyncComplete>,
|
|
}
|
|
|
|
impl ReadCompletion {
|
|
pub fn new(buf: Arc<Buffer>, complete: Box<Complete>) -> Self {
|
|
Self { buf, complete }
|
|
}
|
|
|
|
pub fn buf(&self) -> &Buffer {
|
|
&self.buf
|
|
}
|
|
|
|
pub fn complete(&self, bytes_read: i32) {
|
|
(self.complete)(self.buf.clone(), bytes_read);
|
|
}
|
|
}
|
|
|
|
impl WriteCompletion {
|
|
pub fn new(complete: Box<WriteComplete>) -> Self {
|
|
Self { complete }
|
|
}
|
|
|
|
pub fn complete(&self, bytes_written: i32) {
|
|
(self.complete)(bytes_written);
|
|
}
|
|
}
|
|
|
|
impl SyncCompletion {
|
|
pub fn new(complete: Box<SyncComplete>) -> Self {
|
|
Self { complete }
|
|
}
|
|
|
|
pub fn complete(&self, res: i32) {
|
|
(self.complete)(res);
|
|
}
|
|
}
|
|
|
|
pub struct TruncateCompletion {
|
|
pub complete: Box<TruncateComplete>,
|
|
}
|
|
|
|
impl TruncateCompletion {
|
|
pub fn new(complete: Box<TruncateComplete>) -> Self {
|
|
Self { complete }
|
|
}
|
|
|
|
pub fn complete(&self, res: i32) {
|
|
(self.complete)(res);
|
|
}
|
|
}
|
|
|
|
pub type BufferData = Pin<Box<[u8]>>;
|
|
|
|
pub enum Buffer {
|
|
Heap(BufferData),
|
|
Pooled(ArenaBuffer),
|
|
}
|
|
|
|
impl Debug for Buffer {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::Pooled(p) => write!(f, "{p:?}"),
|
|
Self::Heap(buf) => write!(f, "{buf:?}: {}", buf.len()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Clone for Buffer {
|
|
fn clone(&self) -> Self {
|
|
match self {
|
|
Self::Heap(buf) => {
|
|
let len = buf.len();
|
|
Self::Heap(Vec::from(&buf[..len]).into_boxed_slice().into())
|
|
}
|
|
Self::Pooled(buf) => {
|
|
// Clone pooled buffers as heap buffers
|
|
let data = Vec::from(buf.as_slice());
|
|
Self::Heap(Pin::new(data.into_boxed_slice()))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Drop for Buffer {
|
|
fn drop(&mut self) {
|
|
let len = self.len();
|
|
if let Self::Heap(buf) = self {
|
|
TEMP_BUFFER_CACHE.with(|cache| {
|
|
let mut cache = cache.borrow_mut();
|
|
// take ownership of the buffer by swapping it with a dummy
|
|
let buffer = std::mem::replace(buf, Pin::new(vec![].into_boxed_slice()));
|
|
cache.return_buffer(buffer, len);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Buffer {
|
|
pub fn new(data: Vec<u8>) -> Self {
|
|
tracing::trace!("buffer::new({:?})", data);
|
|
Self::Heap(Pin::new(data.into_boxed_slice()))
|
|
}
|
|
pub fn fixed_id(&self) -> Option<u32> {
|
|
match self {
|
|
Self::Heap { .. } => None,
|
|
Self::Pooled(buf) => buf.fixed_id(),
|
|
}
|
|
}
|
|
|
|
pub fn new_pooled(buf: ArenaBuffer) -> Self {
|
|
tracing::trace!("new_pooled({:?})", buf);
|
|
Self::Pooled(buf)
|
|
}
|
|
|
|
pub fn new_temporary(size: usize) -> Self {
|
|
TEMP_BUFFER_CACHE.with(|cache| {
|
|
if let Some(buffer) = cache.borrow_mut().get_buffer(size) {
|
|
Self::Heap(buffer)
|
|
} else {
|
|
Self::Heap(Pin::new(vec![0; size].into_boxed_slice()))
|
|
}
|
|
})
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
match self {
|
|
Self::Heap(buf) => buf.len(),
|
|
Self::Pooled(buf) => buf.logical_len(),
|
|
}
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.len() == 0
|
|
}
|
|
|
|
pub fn as_slice(&self) -> &[u8] {
|
|
match self {
|
|
Self::Heap(buf) => {
|
|
// SAFETY: The buffer is guaranteed to be valid for the lifetime of the slice
|
|
unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }
|
|
}
|
|
Self::Pooled(buf) => buf,
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::mut_from_ref)]
|
|
pub fn as_mut_slice(&self) -> &mut [u8] {
|
|
match self {
|
|
Self::Heap(buf) => {
|
|
// SAFETY: The buffer is guaranteed to be valid for the lifetime of the slice
|
|
unsafe { std::slice::from_raw_parts_mut(buf.as_ptr() as *mut u8, buf.len()) }
|
|
}
|
|
Self::Pooled(buf) => unsafe {
|
|
std::slice::from_raw_parts_mut(buf.as_ptr() as *mut u8, buf.len())
|
|
},
|
|
}
|
|
}
|
|
pub fn as_mut_ptr(&mut self) -> *mut u8 {
|
|
match self {
|
|
Self::Heap(buf) => buf.as_mut_ptr(),
|
|
Self::Pooled(buf) => buf.as_mut_ptr(),
|
|
}
|
|
}
|
|
}
|
|
|
|
thread_local! {
|
|
/// thread local cache to re-use temporary buffers to prevent churn when pool overflows
|
|
pub static TEMP_BUFFER_CACHE: RefCell<TempBufferCache> = RefCell::new(TempBufferCache::new());
|
|
}
|
|
|
|
pub(crate) struct TempBufferCache {
|
|
// Buffers indexed by size, we only cache common sizes
|
|
page_size: usize,
|
|
page_buffers: Vec<BufferData>,
|
|
wal_frame_buffers: Vec<BufferData>,
|
|
max_cached: usize,
|
|
}
|
|
|
|
impl TempBufferCache {
|
|
fn new() -> Self {
|
|
Self {
|
|
page_size: BufferPool::DEFAULT_PAGE_SIZE,
|
|
page_buffers: Vec::with_capacity(8),
|
|
wal_frame_buffers: Vec::with_capacity(8),
|
|
max_cached: 512,
|
|
}
|
|
}
|
|
|
|
pub fn reinit_cache(&mut self, page_size: usize) {
|
|
self.page_buffers.clear();
|
|
self.wal_frame_buffers.clear();
|
|
self.page_size = page_size;
|
|
}
|
|
|
|
fn get_buffer(&mut self, size: usize) -> Option<BufferData> {
|
|
match size {
|
|
sz if sz == self.page_size => self.page_buffers.pop(),
|
|
sz if sz == (self.page_size + WAL_FRAME_HEADER_SIZE) => self.wal_frame_buffers.pop(),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn return_buffer(&mut self, buff: BufferData, len: usize) {
|
|
let sz = self.page_size;
|
|
let cache = match len {
|
|
n if n.eq(&sz) => &mut self.page_buffers,
|
|
n if n.eq(&(sz + WAL_FRAME_HEADER_SIZE)) => &mut self.wal_frame_buffers,
|
|
_ => return,
|
|
};
|
|
if self.max_cached > cache.len() {
|
|
cache.push(buff);
|
|
}
|
|
}
|
|
}
|
|
|
|
cfg_block! {
|
|
#[cfg(all(target_os = "linux", feature = "io_uring"))] {
|
|
mod io_uring;
|
|
#[cfg(feature = "fs")]
|
|
pub use io_uring::UringIO;
|
|
}
|
|
|
|
#[cfg(target_family = "unix")] {
|
|
mod unix;
|
|
#[cfg(feature = "fs")]
|
|
pub use unix::UnixIO;
|
|
pub use unix::UnixIO as PlatformIO;
|
|
pub use PlatformIO as SyscallIO;
|
|
}
|
|
|
|
#[cfg(any(target_os = "android", target_os = "ios"))] {
|
|
mod unix;
|
|
#[cfg(feature = "fs")]
|
|
pub use unix::UnixIO;
|
|
pub use unix::UnixIO as SyscallIO;
|
|
pub use unix::UnixIO as PlatformIO;
|
|
}
|
|
|
|
#[cfg(target_os = "windows")] {
|
|
mod windows;
|
|
pub use windows::WindowsIO as PlatformIO;
|
|
pub use PlatformIO as SyscallIO;
|
|
}
|
|
|
|
#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows", target_os = "android", target_os = "ios")))] {
|
|
mod generic;
|
|
pub use generic::GenericIO as PlatformIO;
|
|
pub use PlatformIO as SyscallIO;
|
|
}
|
|
}
|
|
|
|
mod memory;
|
|
#[cfg(feature = "fs")]
|
|
mod vfs;
|
|
pub use memory::MemoryIO;
|
|
pub mod clock;
|
|
mod common;
|
|
pub use clock::Clock;
|