core: refactor page in memory representation

This commit is contained in:
Pere Diaz Bou
2024-07-26 21:05:08 +02:00
parent 20dc068a9d
commit 35c3fe7448
3 changed files with 113 additions and 92 deletions

View File

@@ -1,6 +1,6 @@
use crate::pager::Pager;
use crate::sqlite3_ondisk::{
read_varint, write_varint, BTreeCell, BTreePage, DatabaseHeader, PageType, TableInteriorCell,
read_varint, write_varint, BTreeCell, DatabaseHeader, PageContent, PageType, TableInteriorCell,
TableLeafCell,
};
use crate::types::{Cursor, CursorResult, OwnedRecord, OwnedValue};
@@ -76,9 +76,9 @@ impl BTreeCursor {
}
let page = page.contents.read().unwrap();
let page = page.as_ref().unwrap();
if mem_page.cell_idx() >= page.cells.len() {
if mem_page.cell_idx() >= page.cell_count() {
let parent = mem_page.parent.clone();
match page.header.right_most_pointer {
match page.rightmost_pointer() {
Some(right_most_pointer) => {
let mem_page = MemPage::new(parent.clone(), right_most_pointer as usize, 0);
self.page.replace(Some(Rc::new(mem_page)));
@@ -95,7 +95,7 @@ impl BTreeCursor {
},
}
}
let cell = &page.cells[mem_page.cell_idx()];
let cell = page.cell_get(mem_page.cell_idx())?;
match &cell {
BTreeCell::TableInteriorCell(TableInteriorCell {
_left_child_page,
@@ -153,8 +153,8 @@ impl BTreeCursor {
}
let mut found_cell = false;
for cell in &page.cells {
match &cell {
for cell_idx in 0..page.cell_count() {
match &page.cell_get(cell_idx)? {
BTreeCell::TableInteriorCell(TableInteriorCell {
_left_child_page,
_rowid,
@@ -188,7 +188,7 @@ impl BTreeCursor {
if !found_cell {
let parent = mem_page.parent.clone();
match page.header.right_most_pointer {
match page.rightmost_pointer() {
Some(right_most_pointer) => {
let mem_page = MemPage::new(parent, right_most_pointer as usize, 0);
self.page.replace(Some(Rc::new(mem_page)));
@@ -224,7 +224,7 @@ impl BTreeCursor {
let mut page = page.contents.write().unwrap();
let page = page.as_mut().unwrap();
assert!(matches!(page.header.page_type, PageType::TableLeaf));
assert!(matches!(page.page_type(), PageType::TableLeaf));
let free = self.compute_free_space(page, self.database_header.borrow());
@@ -283,7 +283,7 @@ impl BTreeCursor {
let pointer_area_pc_by_idx = 8 + 2 * cell_idx;
// move previous pointers forward and insert new pointer there
let n_cells_forward = 2 * (page.cells.len() - cell_idx);
let n_cells_forward = 2 * (page.cell_count() - cell_idx);
buf.copy_within(
pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward,
pointer_area_pc_by_idx + 2,
@@ -295,36 +295,27 @@ impl BTreeCursor {
buf[5..7].copy_from_slice(&pc.to_be_bytes());
// update cell count
let new_n_cells = (page.cells.len() + 1) as u16;
let new_n_cells = (page.cell_count() + 1) as u16;
buf[3..5].copy_from_slice(&new_n_cells.to_be_bytes());
// TODo: refactor cells to be lazy loadable because this will be crazy slow
let mut payload_for_cell_in_memory: Vec<u8> = Vec::new();
_record.serialize(&mut payload_for_cell_in_memory);
page.cells.insert(
cell_idx,
BTreeCell::TableLeafCell(TableLeafCell {
_rowid: int_key,
_payload: payload_for_cell_in_memory,
first_overflow_page: None,
}),
);
}
Ok(CursorResult::Ok(()))
}
fn allocate_cell_space(&mut self, page_ref: &BTreePage, amount: u16) -> u16 {
fn allocate_cell_space(&mut self, page_ref: &PageContent, amount: u16) -> u16 {
let amount = amount as usize;
let mut buf_ref = RefCell::borrow_mut(&page_ref.buffer);
let buf = buf_ref.as_mut_slice();
let cell_offset = 8;
let gap = cell_offset + 2 * page_ref.cells.len();
let mut top = page_ref.header._cell_content_area as usize;
let gap = cell_offset + 2 * page_ref.cell_count();
let mut top = page_ref.cell_content_area() as usize;
// there are free blocks and enough space
if page_ref.header._first_freeblock_offset != 0 && gap + 2 <= top {
if page_ref.first_freeblock() != 0 && gap + 2 <= top {
// find slot
let db_header = self.database_header.borrow();
let pc = find_free_cell(page_ref, db_header, amount, buf);
@@ -346,7 +337,7 @@ impl BTreeCursor {
return top as u16;
}
fn defragment_page(&self, page: &BTreePage, db_header: Ref<DatabaseHeader>) {
fn defragment_page(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) {
let cloned_page = page.clone();
let usable_space = (db_header.page_size - db_header.unused_space as u16) as u64;
let mut cbrk = usable_space as u64;
@@ -354,14 +345,14 @@ impl BTreeCursor {
// TODO: implement fast algorithm
let last_cell = (usable_space - 4) as u64;
let first_cell = cloned_page.header._cell_content_area as u64;
if cloned_page.cells.len() > 0 {
let first_cell = cloned_page.cell_content_area() as u64;
if cloned_page.cell_count() > 0 {
let buf = cloned_page.buffer.borrow();
let buf = buf.as_slice();
let mut write_buf = RefCell::borrow_mut(&page.buffer);
let write_buf = write_buf.as_mut_slice();
for i in 0..cloned_page.cells.len() {
for i in 0..cloned_page.cell_count() {
let cell_offset = 8;
let cell_idx = cell_offset + i * 2;
@@ -411,19 +402,19 @@ impl BTreeCursor {
// Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte
// and end of cell pointer area.
fn compute_free_space(&self, page: &BTreePage, db_header: Ref<DatabaseHeader>) -> u16 {
fn compute_free_space(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) -> u16 {
let buffer = page.buffer.borrow();
let buf = buffer.as_slice();
let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
let mut first_byte_in_cell_content = page.header._cell_content_area;
let mut first_byte_in_cell_content = page.cell_content_area();
if first_byte_in_cell_content == 0 {
first_byte_in_cell_content = u16::MAX;
}
let fragmented_free_bytes = page.header._num_frag_free_bytes;
let free_block_pointer = page.header._first_freeblock_offset;
let ncell = page.cells.len();
let fragmented_free_bytes = page.num_frag_free_bytes();
let free_block_pointer = page.first_freeblock();
let ncell = page.cell_count();
// 8 + 4 == header end
let first_cell = 8 + 4 + (2 * ncell) as u16;
@@ -469,14 +460,14 @@ impl BTreeCursor {
}
fn find_free_cell(
page_ref: &BTreePage,
page_ref: &PageContent,
db_header: Ref<DatabaseHeader>,
amount: usize,
buf: &[u8],
) -> usize {
// NOTE: freelist is in ascending order of keys and pc
// unuse_space is reserved bytes at the end of page, therefore we must substract from maxpc
let mut pc = page_ref.header._first_freeblock_offset as usize;
let mut pc = page_ref.first_freeblock() as usize;
let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
let maxpc = (usable_space - amount as usize) as usize;
let mut found = false;
@@ -598,10 +589,10 @@ impl Cursor for BTreeCursor {
_ => unreachable!("btree tables are indexed by integers!"),
};
let cell_idx = find_cell(page, int_key);
if cell_idx >= page.cells.len() {
if cell_idx >= page.cell_count() {
Ok(CursorResult::Ok(false))
} else {
let equals = match &page.cells[cell_idx] {
let equals = match &page.cell_get(cell_idx)? {
BTreeCell::TableLeafCell(l) => l._rowid == int_key,
_ => unreachable!(),
};
@@ -610,10 +601,10 @@ impl Cursor for BTreeCursor {
}
}
fn find_cell(page: &BTreePage, int_key: u64) -> usize {
fn find_cell(page: &PageContent, int_key: u64) -> usize {
let mut cell_idx = 0;
for cell in &page.cells {
match cell {
while cell_idx < page.cell_count() {
match page.cell_get(cell_idx).unwrap() {
BTreeCell::TableLeafCell(cell) => {
if int_key <= cell._rowid {
break;

View File

@@ -1,5 +1,5 @@
use crate::buffer_pool::BufferPool;
use crate::sqlite3_ondisk::BTreePage;
use crate::sqlite3_ondisk::PageContent;
use crate::sqlite3_ondisk::{self, DatabaseHeader};
use crate::{PageSource, Result};
use log::trace;
@@ -14,7 +14,7 @@ use std::sync::{Arc, RwLock};
pub struct Page {
flags: AtomicUsize,
pub contents: RwLock<Option<BTreePage>>,
pub contents: RwLock<Option<PageContent>>,
pub id: usize,
}
@@ -305,7 +305,7 @@ impl Pager {
}
let page = Rc::new(RefCell::new(Page::new(page_idx)));
page.borrow().set_locked();
sqlite3_ondisk::begin_read_btree_page(
sqlite3_ondisk::begin_read_page(
&self.page_source,
self.buffer_pool.clone(),
page.clone(),

View File

@@ -188,17 +188,6 @@ pub fn begin_write_database_header(header: &DatabaseHeader, pager: &Pager) -> Re
Ok(())
}
#[derive(Debug, Clone)]
pub struct BTreePageHeader {
pub(crate) page_type: PageType,
pub(crate) _first_freeblock_offset: u16,
pub(crate) num_cells: u16,
// First byte of content area
pub(crate) _cell_content_area: u16,
pub(crate) _num_frag_free_bytes: u8,
pub(crate) right_most_pointer: Option<u32>,
}
#[repr(u8)]
#[derive(Debug, PartialEq, Clone)]
pub enum PageType {
@@ -223,15 +212,84 @@ impl TryFrom<u8> for PageType {
}
#[derive(Debug, Clone)]
pub struct BTreePage {
pub header: BTreePageHeader,
pub cells: Vec<BTreeCell>,
pub struct PageContent {
pub offset: usize,
pub buffer: Rc<RefCell<Buffer>>,
}
impl BTreePage {
impl PageContent {
pub fn page_type(&self) -> PageType {
let buf = self.buffer.borrow();
let buf = buf.as_slice();
buf[self.offset].try_into().unwrap()
}
fn read_u16(&self, pos: usize) -> u16 {
unsafe {
let buf_pointer = &self.buffer.as_ptr();
let buf = (*buf_pointer).as_ref().unwrap().as_slice();
u16::from_be_bytes([buf[self.offset + pos], buf[self.offset + pos + 1]])
}
}
fn read_u32(&self, pos: usize) -> u32 {
unsafe {
let buf_pointer = &self.buffer.as_ptr();
let buf = (*buf_pointer).as_ref().unwrap().as_slice();
u32::from_be_bytes([
buf[self.offset + pos],
buf[self.offset + pos + 1],
buf[self.offset + pos + 2],
buf[self.offset + pos + 3],
])
}
}
pub fn first_freeblock(&self) -> u16 {
self.read_u16(1)
}
pub fn cell_count(&self) -> usize {
self.read_u16(3) as usize
}
pub fn cell_content_area(&self) -> u16 {
self.read_u16(5) as u16
}
pub fn num_frag_free_bytes(&self) -> u16 {
self.read_u16(7) as u16
}
pub fn rightmost_pointer(&self) -> Option<u32> {
match self.page_type() {
PageType::IndexInterior => Some(self.read_u32(8)),
PageType::TableInterior => Some(self.read_u32(8)),
PageType::IndexLeaf => None,
PageType::TableLeaf => None,
}
}
pub fn cell_get(&self, idx: usize) -> Result<BTreeCell> {
let buf = self.buffer.borrow();
let buf = buf.as_slice();
let ncells = self.cell_count();
let cell_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
assert!(idx < ncells, "cell_get: idx out of bounds");
let cell_pointer = cell_start + (idx * 2);
let cell_pointer = self.read_u16(cell_pointer) as usize;
read_btree_cell(buf, &self.page_type(), cell_pointer)
}
pub fn is_leaf(&self) -> bool {
match self.header.page_type {
match self.page_type() {
PageType::IndexInterior => false,
PageType::TableInterior => false,
PageType::IndexLeaf => true,
@@ -240,7 +298,7 @@ impl BTreePage {
}
}
pub fn begin_read_btree_page(
pub fn begin_read_page(
page_source: &PageSource,
buffer_pool: Rc<BufferPool>,
page: Rc<RefCell<Page>>,
@@ -255,7 +313,7 @@ pub fn begin_read_btree_page(
let buf = Rc::new(RefCell::new(Buffer::new(buf, drop_fn)));
let complete = Box::new(move |buf: Rc<RefCell<Buffer>>| {
let page = page.clone();
if finish_read_btree_page(page_idx, buf, page.clone()).is_err() {
if finish_read_page(page_idx, buf, page.clone()).is_err() {
page.borrow_mut().set_error();
}
});
@@ -264,47 +322,19 @@ pub fn begin_read_btree_page(
Ok(())
}
fn finish_read_btree_page(
fn finish_read_page(
page_idx: usize,
buffer_ref: Rc<RefCell<Buffer>>,
page: Rc<RefCell<Page>>,
) -> Result<()> {
trace!("finish_read_btree_page(page_idx = {})", page_idx);
let mut pos = if page_idx == 1 {
let pos = if page_idx == 1 {
DATABASE_HEADER_SIZE
} else {
0
};
let buf = buffer_ref.borrow();
let buf = buf.as_slice();
let mut header = BTreePageHeader {
page_type: buf[pos].try_into()?,
_first_freeblock_offset: u16::from_be_bytes([buf[pos + 1], buf[pos + 2]]),
num_cells: u16::from_be_bytes([buf[pos + 3], buf[pos + 4]]),
_cell_content_area: u16::from_be_bytes([buf[pos + 5], buf[pos + 6]]),
_num_frag_free_bytes: buf[pos + 7],
right_most_pointer: None,
};
pos += 8;
if header.page_type == PageType::IndexInterior || header.page_type == PageType::TableInterior {
header.right_most_pointer = Some(u32::from_be_bytes([
buf[pos],
buf[pos + 1],
buf[pos + 2],
buf[pos + 3],
]));
pos += 4;
}
let mut cells = Vec::with_capacity(header.num_cells as usize);
for _ in 0..header.num_cells {
let cell_pointer = u16::from_be_bytes([buf[pos], buf[pos + 1]]);
pos += 2;
let cell = read_btree_cell(buf, &header.page_type, cell_pointer as usize)?;
cells.push(cell);
}
let inner = BTreePage {
header,
cells,
let inner = PageContent {
offset: pos,
buffer: buffer_ref.clone(),
};
{