twizzler_abi/
trace.rs

1//! Tracing data structures
2//!
3//! This module defines the object and data structures for tracing kernel, thread, etc. events.
4//! Tracing is done via the sys_ktrace system call, which takes an object and a trace spec. This
5//! object is the "prime" trace object for a sequence of trace objects. The kernel fills these
6//! objects with a stream of trace events until they fill up (reach a maximum size defined by the
7//! kernel). Once a trace object fills up, the kernel generates a new trace object with the same
8//! CreateSpec as the prime object, and starts filling that object with data. It then appends a
9//! 'next-object' entry to the object that just filled up, so readers know which object to continue
10//! reading from.
11//!
12//! A single prime tracing object can be associated with multiple TraceSpec structures, enabling
13//! that object (or its subsequent objects) to collect a wide variety of data with fine-grained
14//! control over which events are collected.
15//!
16//! Streamed data is tracked via the TraceBase structure, located at the base address of any trace
17//! object. The end field defines the current end-point of data within the object. This atomic field
18//! is updated by the kernel when trace data is written, and any threads sleeping on this value will
19//! wake up.
20//!
21//! Note that not all trace events are generated synchronously. Asynchronous events are generated by
22//! the kernel when in critical states. These events are not guaranteed to be reported in a timely
23//! manner, and may be dropped if the system is under heavy load.
24
25use core::{alloc::Layout, sync::atomic::AtomicU64};
26
27use twizzler_rt_abi::object::ObjID;
28
29use crate::{
30    pager::{CompletionToKernel, CompletionToPager, KernelCommand, PagerRequest},
31    syscall::{
32        MapFlags, Syscall, ThreadSyncFlags, ThreadSyncOp, ThreadSyncReference, ThreadSyncSleep,
33        TimeSpan,
34    },
35    thread::ExecutionState,
36};
37
38#[derive(Clone, Copy, Debug, Default)]
39#[repr(C)]
40/// Header for a trace entry. This is always present, and may be optionally followed by additional
41/// data, if the `flags` field contains the `HAS_DATA` flag.
42pub struct TraceEntryHead {
43    /// The ID of the thread that generated this trace entry.
44    pub thread: ObjID,
45    /// The ID of the security context that generated this trace entry.
46    pub sctx: ObjID,
47    /// The ID of the memory context that generated this trace entry.
48    pub mctx: ObjID,
49    /// The ID of the CPU that generated this trace entry.
50    pub cpuid: u64,
51    /// The time at which this trace entry was generated.
52    pub time: TimeSpan,
53    /// The event that generated this trace entry.
54    pub event: u64,
55    /// The kind of trace entry.
56    pub kind: TraceKind,
57    /// Provided extra data from the [TraceSpec] that matched this entry, or if NEXT_OBJECT is set,
58    /// the ID of the next object.
59    pub extra_or_next: ObjID,
60    /// Flags indicating the type of trace entry.
61    pub flags: TraceEntryFlags,
62}
63
64impl TraceEntryHead {
65    /// Create a new trace entry head with the NEXT_OBJECT flag set.
66    pub fn new_next_object(id: ObjID) -> Self {
67        Self {
68            extra_or_next: id,
69            flags: TraceEntryFlags::NEXT_OBJECT,
70            ..Default::default()
71        }
72    }
73}
74
75#[derive(Clone, Copy, Debug)]
76#[repr(C)]
77/// Header for additional data.
78pub struct TraceData<T: Copy> {
79    /// Reserved for future use.
80    pub resv: u64,
81    /// Length of the data in bytes (including this header).
82    pub len: u32,
83    /// Flags for this extra data.
84    pub flags: u32,
85    /// Data associated with the trace entry.
86    pub data: T,
87}
88
89impl<T: Copy> TraceData<T> {
90    /// Try to cast the data into a concrete Trace Data type, if events match.
91    pub fn try_cast<U: TraceDataCast + Copy>(&self, events: u64) -> Option<&TraceData<U>> {
92        if events & U::EVENT != 0 {
93            unsafe {
94                Some(
95                    (self as *const Self)
96                        .cast::<TraceData<U>>()
97                        .as_ref()
98                        .unwrap(),
99                )
100            }
101        } else {
102            None
103        }
104    }
105}
106
107#[repr(C)]
108/// The base structure for a trace object.
109pub struct TraceBase {
110    /// The end point for valid data. The kernel will update this and submit a thread_sync wakeup
111    /// when writing trace data to this object.
112    pub end: AtomicU64,
113    /// The start point of valid data in this object. This is set by the kernel during
114    /// initialization and then not updated again.
115    pub start: u64,
116}
117
118impl TraceBase {
119    /// Get a waiter for this tracing object based on how much data has thus-far been read.
120    pub fn waiter(&self, pos: u64) -> ThreadSyncSleep {
121        ThreadSyncSleep::new(
122            ThreadSyncReference::Virtual(&self.end),
123            pos,
124            ThreadSyncOp::Equal,
125            ThreadSyncFlags::empty(),
126        )
127    }
128}
129
130#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, PartialOrd, Ord)]
131#[repr(u16)]
132/// Kinds of tracing events.
133pub enum TraceKind {
134    Kernel,
135    Thread,
136    Object,
137    Context,
138    Security,
139    Pager,
140    Runtime,
141    #[default]
142    Other = 0xffff,
143}
144
145bitflags::bitflags! {
146    #[derive(Clone, Copy, Debug)]
147    pub struct TraceFlags: u16 {
148        /// Include additional data.
149        const DATA = 1;
150        // TODO: support collecting thread registers.
151        //const REGISTERS = 2;
152    }
153}
154
155bitflags::bitflags! {
156    #[derive(Clone, Copy, Debug, Default)]
157    /// Trace entry flags.
158    pub struct TraceEntryFlags: u16 {
159        /// The kernel dropped a trace event when processing.
160        const DROPPED = 1;
161        /// This trace entry is followed by a TraceData entry.
162        const HAS_DATA = 2;
163        /// This trace entry is NOT a trace entry, and instead indicates that
164        /// the kernel ran out of room in the current object. The extra_or_next field
165        /// contains the object ID of the next trace object to read from.
166        const NEXT_OBJECT = 4;
167    }
168}
169
170// Thread events
171/// Thread has exited.
172pub const THREAD_EXIT: u64 = 1;
173/// Thread context switch occurred.
174pub const THREAD_CONTEXT_SWITCH: u64 = 2;
175/// Thread sampling event occurred.
176pub const THREAD_SAMPLE: u64 = 4;
177/// Thread made a system call.
178pub const THREAD_SYSCALL_ENTRY: u64 = 8;
179/// Thread was blocked.
180pub const THREAD_BLOCK: u64 = 0x10;
181/// Thread was resumed from blocked state.
182pub const THREAD_RESUME: u64 = 0x20;
183/// Thread migrated to a different CPU.
184pub const THREAD_MIGRATE: u64 = 0x40;
185/// Thread returned from a system call.
186pub const THREAD_SYSCALL_EXIT: u64 = 0x80;
187
188// Object events
189/// Object control operation occurred.
190pub const OBJECT_CTRL: u64 = 1;
191/// Object was created.
192pub const OBJECT_CREATE: u64 = 2;
193
194// Context events
195/// Memory mapping operation occurred.
196pub const CONTEXT_MAP: u64 = 1;
197/// Memory unmapping operation occurred.
198pub const CONTEXT_UNMAP: u64 = 2;
199/// Memory fault occurred.
200pub const CONTEXT_FAULT: u64 = 4;
201/// TLB shootdown occurred.
202pub const CONTEXT_SHOOTDOWN: u64 = 8;
203/// Memory invalidation occurred.
204pub const CONTEXT_INVALIDATION: u64 = 0x10;
205
206// Security events
207/// Entered a security context.
208pub const SECURITY_CTX_ENTRY: u64 = 1;
209/// Exited a security context.
210pub const SECURITY_CTX_EXIT: u64 = 2;
211/// Security violation occurred.
212pub const SECURITY_VIOLATION: u64 = 4;
213
214// Kernel events
215/// Kernel memory allocation occurred.
216pub const KERNEL_ALLOC: u64 = 1;
217
218/// Kernel performed thread balancing.
219pub const KERNEL_BALANCE: u64 = 2;
220
221// Pager events
222/// Pager command was sent.
223pub const PAGER_COMMAND_SEND: u64 = 1;
224/// Pager command was responded to.
225pub const PAGER_COMMAND_RESPONDED: u64 = 2;
226/// Pager request was received.
227pub const PAGER_REQUEST_RECV: u64 = 4;
228/// Pager request was completed.
229pub const PAGER_REQUEST_COMPLETED: u64 = 8;
230
231/// Runtime memory allocation occurred.
232pub const RUNTIME_ALLOC: u64 = 1;
233
234/// Trait for types that can be cast from trace data based on event types.
235pub trait TraceDataCast {
236    /// The event constant associated with this trace data type.
237    const EVENT: u64;
238}
239
240/// Kernel allocation information.
241#[repr(C)]
242#[derive(Clone, Copy, Debug)]
243pub struct KernelAllocationEvent {
244    pub duration: TimeSpan,
245    pub layout: Layout,
246    pub is_free: bool,
247}
248
249/// Kernel allocation information.
250#[repr(C)]
251#[derive(Clone, Copy, Debug)]
252pub struct KernelRebalance {
253    pub duration: TimeSpan,
254    pub moved: u32,
255}
256
257/// Runtime allocation information.
258#[repr(C)]
259#[derive(Clone, Copy, Debug)]
260pub struct RuntimeAllocationEvent {
261    pub duration: TimeSpan,
262    pub layout: Layout,
263    pub addr: u64,
264    pub is_free: bool,
265}
266
267/// Event data for thread operations.
268#[repr(C)]
269#[derive(Clone, Copy, Debug)]
270pub struct ThreadEvent {
271    /// Generic value associated with the thread event.
272    pub val: u64,
273}
274
275pub const MAX_BLOCK_NAME: usize = 28;
276
277#[repr(C)]
278#[derive(Clone, Copy, Debug)]
279pub struct ThreadBlocked {
280    /// UTF-8 bytes of name of this block point.
281    pub block_name: [u8; MAX_BLOCK_NAME],
282    /// Length of the block_name.
283    pub block_name_len: u32,
284}
285
286#[repr(C)]
287#[derive(Clone, Copy, Debug)]
288pub struct ThreadResumed {
289    /// Time spent blocked.
290    pub duration: TimeSpan,
291}
292
293/// Event data for system call entry.
294#[repr(C)]
295#[derive(Clone, Copy, Debug)]
296pub struct SyscallEntryEvent {
297    /// Instruction pointer at syscall entry.
298    pub ip: u64,
299    /// The system call number.
300    pub num: Syscall,
301    /// Arguments.
302    pub args: [u64; 6],
303}
304
305/// Event data for system call entry.
306#[repr(C)]
307#[derive(Clone, Copy, Debug)]
308pub struct SyscallExitEvent {
309    pub entry: SyscallEntryEvent,
310    /// The return value.
311    pub ret: [u64; 2],
312    /// Time spent processing system call.
313    pub duration: TimeSpan,
314}
315
316bitflags::bitflags! {
317    #[derive(Clone, Copy, Debug)]
318    /// Flags describing memory fault characteristics.
319    pub struct SwitchFlags: u64 {
320        /// Switch to kernel thread
321        const TO_KTHREAD = 1;
322        /// Switching to tracing thread
323        const IS_TRACE = 2;
324        /// Switch to idle thread
325        const TO_IDLE = 4;
326        /// Thread was preempted
327        const PREEMPTED = 8;
328        /// Thread is going to sleep
329        const SLEEPING = 0x10;
330    }
331}
332
333/// Event data for thread context switches.
334#[repr(C)]
335#[derive(Clone, Copy, Debug)]
336pub struct ThreadCtxSwitch {
337    /// ID of the thread being switched to, if any.
338    pub to: Option<ObjID>,
339    pub flags: SwitchFlags,
340}
341
342/// Event data for thread migration between CPUs.
343#[repr(C)]
344#[derive(Clone, Copy, Debug)]
345pub struct ThreadMigrate {
346    /// ID of the CPU being migrated from.
347    pub from: u64,
348    /// ID of the CPU being migrated to.
349    pub to: u64,
350}
351
352/// Event data for thread sampling operations.
353#[repr(C)]
354#[derive(Clone, Copy, Debug)]
355pub struct ThreadSamplingEvent {
356    /// Instruction pointer at sampling time.
357    pub ip: u64,
358    /// Thread execution state at sampling time.
359    pub state: ExecutionState,
360}
361
362/// Event data for memory mapping operations.
363#[repr(C)]
364#[derive(Clone, Copy, Debug)]
365pub struct ContextMapEvent {
366    /// Virtual address being mapped.
367    pub addr: u64,
368    /// Length of the mapping in bytes.
369    pub len: u64,
370    /// Object being mapped.
371    pub obj: ObjID,
372    /// Mapping flags.
373    pub flags: MapFlags,
374}
375
376bitflags::bitflags! {
377    #[derive(Clone, Copy, Debug)]
378    /// Flags describing memory fault characteristics.
379    pub struct FaultFlags: u64 {
380        /// Fault occurred on read access.
381        const READ = 1;
382        /// Fault occurred on write access.
383        const WRITE = 2;
384        /// Fault occurred on execute access.
385        const EXEC = 4;
386        /// Fault occurred in user mode.
387        const USER = 8;
388        /// Fault was handled by pager.
389        const PAGER = 0x10;
390        /// Fault involved large pages.
391        const LARGE = 0x20;
392    }
393}
394
395/// Event data for memory faults.
396#[derive(Clone, Copy, Debug)]
397pub struct ContextFaultEvent {
398    /// Virtual address that faulted.
399    pub addr: u64,
400    /// Object associated with the fault.
401    pub obj: ObjID,
402    /// Flags describing the fault type.
403    pub flags: FaultFlags,
404    /// Time spent processing the fault.
405    pub processing_time: TimeSpan,
406}
407
408/// Event data for pager commands sent to kernel.
409#[derive(Clone, Copy, Debug)]
410pub struct PagerCommandSent {
411    /// The command that was sent.
412    pub cmd: KernelCommand,
413    /// Queue ID for the command.
414    pub qid: u32,
415}
416
417/// Event data for pager command responses.
418#[derive(Clone, Copy, Debug)]
419pub struct PagerCommandResponded {
420    /// Queue ID for the response.
421    pub qid: u32,
422    /// The response data.
423    pub resp: CompletionToKernel,
424}
425
426/// Event data for pager requests received.
427#[derive(Clone, Copy, Debug)]
428pub struct PagerRequestRecv {
429    /// The request that was received.
430    pub req: PagerRequest,
431    /// Queue ID for the request.
432    pub qid: u32,
433}
434
435/// Event data for completed pager requests.
436#[derive(Clone, Copy, Debug)]
437pub struct PagerRequestCompleted {
438    /// Queue ID for the completed request.
439    pub qid: u32,
440    /// The completion response.
441    pub resp: CompletionToPager,
442}
443
444impl TraceDataCast for ContextMapEvent {
445    const EVENT: u64 = CONTEXT_MAP;
446}
447
448impl TraceDataCast for ContextFaultEvent {
449    const EVENT: u64 = CONTEXT_FAULT;
450}
451
452impl TraceDataCast for ThreadEvent {
453    const EVENT: u64 = THREAD_EXIT;
454}
455
456impl TraceDataCast for ThreadCtxSwitch {
457    const EVENT: u64 = THREAD_CONTEXT_SWITCH;
458}
459
460impl TraceDataCast for ThreadMigrate {
461    const EVENT: u64 = THREAD_MIGRATE;
462}
463
464impl TraceDataCast for PagerCommandSent {
465    const EVENT: u64 = PAGER_COMMAND_SEND;
466}
467
468impl TraceDataCast for PagerCommandResponded {
469    const EVENT: u64 = PAGER_COMMAND_RESPONDED;
470}
471
472impl TraceDataCast for PagerRequestRecv {
473    const EVENT: u64 = PAGER_REQUEST_RECV;
474}
475
476impl TraceDataCast for PagerRequestCompleted {
477    const EVENT: u64 = PAGER_REQUEST_COMPLETED;
478}
479
480impl TraceDataCast for SyscallEntryEvent {
481    const EVENT: u64 = THREAD_SYSCALL_ENTRY;
482}
483
484impl TraceDataCast for SyscallExitEvent {
485    const EVENT: u64 = THREAD_SYSCALL_EXIT;
486}
487
488impl TraceDataCast for ThreadSamplingEvent {
489    const EVENT: u64 = THREAD_SAMPLE;
490}
491
492impl TraceDataCast for ThreadBlocked {
493    const EVENT: u64 = THREAD_BLOCK;
494}
495
496impl TraceDataCast for ThreadResumed {
497    const EVENT: u64 = THREAD_RESUME;
498}
499
500impl TraceDataCast for RuntimeAllocationEvent {
501    const EVENT: u64 = RUNTIME_ALLOC;
502}
503
504impl TraceDataCast for KernelAllocationEvent {
505    const EVENT: u64 = KERNEL_ALLOC;
506}