object_store/
ext4.rs

1#[cfg(not(target_os = "twizzler"))]
2use std::io::Result;
3use std::{
4    ffi::CString,
5    io::{ErrorKind, Read, Seek, SeekFrom, Write},
6    sync::{
7        atomic::{AtomicU64, Ordering},
8        Arc, Condvar, Mutex, MutexGuard,
9    },
10};
11
12use efs::fs::ext2::inode::ROOT_DIRECTORY_INODE;
13use lwext4_rs::{Ext4Blockdev, Ext4BlockdevIface, Ext4File, Ext4Fs, FileKind, O_CREAT, O_RDWR};
14#[cfg(target_os = "twizzler")]
15use twizzler::Result;
16
17use crate::{
18    ino_to_objid, objid_to_ino, DevicePage, ExternalFile, ExternalKind, ObjID, PagedDevice,
19    PagedObjectStore, PosIo, PAGE_SIZE,
20};
21
22pub struct Ext4Store {
23    fs: Mutex<Ext4Fs>,
24    device: Arc<dyn Device>,
25}
26
27pub trait Device: PosIo + PagedDevice + Sync + Send {}
28
29impl<T: PosIo + PagedDevice + Sync + Send> Device for T {}
30
31struct Ext4Bd {
32    device: Arc<dyn Device>,
33    phys_bcount: u64,
34    lock: Mutex<bool>,
35    cv: Condvar,
36}
37
38impl Ext4BlockdevIface for Ext4Bd {
39    fn phys_block_size(&mut self) -> u32 {
40        PHYSICAL_BSIZE
41    }
42
43    fn phys_block_count(&mut self) -> u64 {
44        self.phys_bcount
45    }
46
47    fn open(&mut self) -> std::io::Result<()> {
48        Ok(())
49    }
50
51    fn close(&mut self) -> std::io::Result<()> {
52        Ok(())
53    }
54
55    fn read(&mut self, buf: *mut u8, block: u64, bcount: u32) -> std::io::Result<u32> {
56        let start = block * PHYSICAL_BSIZE as u64;
57        let len = bcount as u64 * PHYSICAL_BSIZE as u64;
58        let slice = unsafe { core::slice::from_raw_parts_mut(buf, len as usize) };
59        let len = self.device.read(start, slice)?;
60        Ok((len / PHYSICAL_BSIZE as usize) as u32)
61    }
62
63    fn write(&mut self, buf: *const u8, block: u64, bcount: u32) -> std::io::Result<u32> {
64        let start = block * PHYSICAL_BSIZE as u64;
65        let len = bcount as u64 * PHYSICAL_BSIZE as u64;
66        let slice = unsafe { core::slice::from_raw_parts(buf, len as usize) };
67        let len = self.device.write(start, slice)?;
68        Ok((len / PHYSICAL_BSIZE as usize) as u32)
69    }
70
71    fn lock(&self) -> std::io::Result<()> {
72        let mut inner = self.lock.lock().unwrap();
73        while *inner {
74            inner = self.cv.wait(inner).unwrap();
75        }
76        *inner = true;
77        Ok(())
78    }
79
80    fn unlock(&self) -> std::io::Result<()> {
81        let mut inner = self.lock.lock().unwrap();
82        assert!(*inner);
83        *inner = false;
84        self.cv.notify_all();
85        Ok(())
86    }
87}
88
89impl Ext4Bd {
90    fn new(device: Arc<dyn Device>, _name: &str, phys_bcount: u64) -> Self {
91        Self {
92            device,
93            phys_bcount,
94            lock: Mutex::new(false),
95            cv: Condvar::new(),
96        }
97    }
98}
99
100impl From<FileKind> for ExternalKind {
101    fn from(value: FileKind) -> Self {
102        match value {
103            FileKind::Regular => ExternalKind::Regular,
104            FileKind::Directory => ExternalKind::Directory,
105            FileKind::Symlink => ExternalKind::SymLink,
106            FileKind::Other => ExternalKind::Other,
107        }
108    }
109}
110
111static BDEV_ID: AtomicU64 = AtomicU64::new(0);
112
113const LOGICAL_BSIZE: u32 = 512;
114const PHYSICAL_BSIZE: u32 = 512;
115
116impl Ext4Store {
117    pub fn new<D: Device + 'static>(device: D, name: &str) -> Result<Self> {
118        let bdname = format!("blockdev-{}", BDEV_ID.fetch_add(1, Ordering::SeqCst));
119        let max = device.len()? as u64;
120        let bcount = max / LOGICAL_BSIZE as u64;
121        let phys_bcount = max / PHYSICAL_BSIZE as u64;
122        let device = Arc::new(device);
123        let bd = Ext4Blockdev::new(
124            Ext4Bd::new(device.clone(), bdname.as_str(), phys_bcount),
125            LOGICAL_BSIZE,
126            bcount,
127            name,
128        )?;
129
130        let mut fs = Ext4Fs::new(bd, CString::new(name).unwrap(), false)?;
131
132        match fs.create_dir("ids") {
133            Err(e) if e.kind() != ErrorKind::AlreadyExists => {
134                return Err(e.into());
135            }
136            _ => {}
137        }
138
139        Ok(Self {
140            fs: Mutex::new(fs),
141            device,
142        })
143    }
144
145    pub fn get_id_path(&self, id: ObjID) -> (String, String) {
146        let top = id.to_be_bytes()[0];
147        let us = format!("ids/{:x}", top);
148        (us, format!("ids/{:x}/{:x}", top, id))
149    }
150
151    pub fn get_object_as_file<'a>(
152        &self,
153        fs: &'a mut MutexGuard<'_, Ext4Fs>,
154        id: ObjID,
155        create: bool,
156    ) -> Result<Ext4File<'a>> {
157        let flags = if create { O_RDWR | O_CREAT } else { O_RDWR };
158        if let Some(ino) = objid_to_ino(id) {
159            return Ok(fs.open_file_from_inode(ino, flags)?);
160        }
161        let path = self.get_id_path(id);
162        if create {
163            match fs.create_dir(&path.0) {
164                Ok(_) => {}
165                Err(e) if e.kind() == ErrorKind::AlreadyExists => {}
166                Err(e) => Err(e)?,
167            }
168        }
169        Ok(fs.open_file(&path.1, flags)?)
170    }
171}
172
173impl PagedObjectStore for Ext4Store {
174    fn create_object(&self, id: crate::ObjID) -> Result<()> {
175        let mut fs = self.fs.lock().unwrap();
176        self.get_object_as_file(&mut fs, id, true)?;
177        Ok(())
178    }
179
180    fn delete_object(&self, id: crate::ObjID) -> Result<()> {
181        let path = self.get_id_path(id);
182        Ok(self.fs.lock().unwrap().remove_file(&path.1)?)
183    }
184
185    fn len(&self, id: crate::ObjID) -> Result<u64> {
186        let mut fs = self.fs.lock().unwrap();
187        let mut file = self.get_object_as_file(&mut fs, id, false)?;
188        Ok(file.len())
189    }
190
191    fn read_object(&self, id: crate::ObjID, offset: u64, buf: &mut [u8]) -> Result<usize> {
192        let mut fs = self.fs.lock().unwrap();
193        let mut file = self.get_object_as_file(&mut fs, id, false)?;
194        file.seek(SeekFrom::Start(offset))?;
195        Ok(file.read(buf)?)
196    }
197
198    fn write_object(&self, id: crate::ObjID, offset: u64, buf: &[u8]) -> Result<()> {
199        let mut fs = self.fs.lock().unwrap();
200        let mut file = self.get_object_as_file(&mut fs, id, false)?;
201        if offset > file.len() {
202            file.ensure_backing(offset)
203                .inspect_err(|e| tracing::warn!("failed to ensure backing for object: {}", e))?;
204            file.truncate(offset).inspect_err(|e| {
205                tracing::warn!("failed to initialize object to {}: {}", offset, e)
206            })?;
207        }
208        file.seek(SeekFrom::Start(offset))?;
209        // TODO
210        file.write(buf)?;
211        Ok(())
212    }
213
214    fn get_config_id(&self) -> Result<ObjID> {
215        let mut buf = [0; 16];
216        self.read_object(0, 0, &mut buf).and_then(|len| {
217            if len == 16 && buf.iter().find(|x| **x != 0).is_some() {
218                Ok(ObjID::from_le_bytes(buf))
219            } else {
220                Err(ErrorKind::InvalidData.into())
221            }
222        })
223    }
224
225    fn set_config_id(&self, id: ObjID) -> Result<()> {
226        let _ = self.delete_object(0);
227        self.create_object(0)?;
228        self.write_object(0, 0, &id.to_le_bytes())
229    }
230
231    fn flush(&self) -> Result<()> {
232        Ok(())
233    }
234
235    fn page_in_object<'a>(&self, id: ObjID, reqs: &'a mut [crate::PageRequest]) -> Result<usize> {
236        let mut fs = self.fs.lock().unwrap();
237        let blocks_per_page = PAGE_SIZE / fs.block_size()? as usize;
238        let mut file = self.get_object_as_file(&mut fs, id, false)?;
239        let mut inode = file.get_file_inode()?;
240        tracing::debug!("paging  in request for {} reqs", reqs.len());
241        let mut blocks = reqs
242            .iter_mut()
243            .map(|req| {
244                let mut disk_pages = Vec::<DevicePage>::new();
245
246                let mut page = req.start_page;
247                let end = req.start_page + req.nr_pages as i64;
248
249                while page < end {
250                    let mut block = page as u32;
251                    if objid_to_ino(id).is_some() {
252                        // External files don't have null pages
253                        block -= 1;
254                    }
255                    block = block * blocks_per_page as u32;
256                    let rem_blocks = (end - page) as u32 * blocks_per_page as u32;
257
258                    let item = match inode.get_data_blocks(block, rem_blocks, false) {
259                        Ok((dblock, nr_dblk)) if nr_dblk > 0 => {
260                            if dblock == 0 {
261                                DevicePage::Hole(nr_dblk)
262                            } else {
263                                DevicePage::Run(dblock, nr_dblk)
264                            }
265                        }
266                        _ => match inode.get_data_block(block, false)? {
267                            0 => DevicePage::Hole(1),
268                            dpg => DevicePage::Run(dpg, 1),
269                        },
270                    };
271                    page += item.nr_pages() as i64;
272                    if let Some(prev) = disk_pages.last_mut() {
273                        if !prev.try_extend(&item) {
274                            disk_pages.push(item);
275                        }
276                    } else {
277                        disk_pages.push(item);
278                    }
279                }
280                Result::Ok((req, disk_pages))
281            })
282            .try_collect::<Vec<_>>()?;
283        drop(file);
284        drop(fs);
285        for br in blocks.iter_mut() {
286            let pages = &br.1[..];
287            let _len = br.0.page_in(pages, &*self.device)?;
288        }
289
290        Ok(reqs.len())
291    }
292
293    fn page_out_object<'a>(&self, id: ObjID, reqs: &'a mut [crate::PageRequest]) -> Result<usize> {
294        let end_offset = reqs
295            .iter()
296            .max_by_key(|req| req.start_page as u64 + req.nr_pages as u64)
297            .map(|end_req| {
298                (end_req.start_page as u64 + end_req.nr_pages as u64) * PAGE_SIZE as u64
299            });
300
301        let mut fs = self.fs.lock().unwrap();
302        let blocks_per_page = PAGE_SIZE / fs.block_size()? as usize;
303        let mut file = self.get_object_as_file(&mut fs, id, false)?;
304        if end_offset.unwrap_or(0) >= file.len() {
305            drop(file);
306            drop(fs);
307            self.write_object(id, end_offset.unwrap_or(0), &[0u8; PAGE_SIZE])?;
308            fs = self.fs.lock().unwrap();
309        } else {
310            drop(file);
311        }
312        let mut file = self.get_object_as_file(&mut fs, id, false)?;
313        let mut inode = file.get_file_inode()?;
314        tracing::debug!("paging out request for {} reqs", reqs.len());
315
316        let mut blocks = reqs
317            .iter_mut()
318            .map(|req| {
319                let mut disk_pages = Vec::<DevicePage>::new();
320                for page in req.start_page..(req.start_page + req.nr_pages as i64) {
321                    let mut block = page as u32;
322                    if objid_to_ino(id).is_some() {
323                        // External files don't have null pages
324                        block -= 1;
325                    }
326                    let item = match inode.get_data_block(block * blocks_per_page as u32, true)? {
327                        0 => Result::Err(ErrorKind::Other.into())?,
328                        dpg => DevicePage::Run(dpg, 1),
329                    };
330                    if let Some(prev) = disk_pages.last_mut() {
331                        if !prev.try_extend(&item) {
332                            disk_pages.push(item);
333                        }
334                    } else {
335                        disk_pages.push(item);
336                    }
337                }
338                Result::Ok((req, disk_pages))
339            })
340            .try_collect::<Vec<_>>()?;
341
342        drop(file);
343        drop(fs);
344        for br in blocks.iter_mut() {
345            let pages = &br.1[..];
346            let _len = br.0.page_out(pages, &*self.device)?;
347        }
348        Ok(reqs.len())
349    }
350
351    fn enumerate_external(&self, id: ObjID) -> Result<Vec<ExternalFile>> {
352        let mut fs = self.fs.lock().unwrap();
353        let mut inonr = objid_to_ino(id).ok_or(ErrorKind::InvalidInput)?;
354        if inonr == 0 {
355            inonr = ROOT_DIRECTORY_INODE;
356        }
357        let mut inode = fs.get_inode(inonr)?;
358        let diriter = fs.dirents(&mut inode)?;
359
360        Ok(diriter
361            .filter_map(|de| {
362                de.1.ok()
363                    .map(|ino| ExternalFile::new(&de.0, ino.kind().into(), ino_to_objid(ino.num())))
364            })
365            .collect())
366    }
367
368    fn find_external(&self, id: ObjID) -> Result<usize> {
369        let mut fs = self.fs.lock().unwrap();
370        let mut inonr = objid_to_ino(id).ok_or(ErrorKind::InvalidInput)?;
371        if inonr == 0 {
372            inonr = ROOT_DIRECTORY_INODE;
373        }
374        let inode = fs.get_inode(inonr)?;
375        Ok(inode.size() as usize)
376    }
377}