Skip to content

Commit 28d2d7e

Browse files
committed
implement sched affinity syscalls
1 parent a8aef04 commit 28d2d7e

File tree

5 files changed

+126
-32
lines changed

5 files changed

+126
-32
lines changed

etc/syscalls_linux_aarch64.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@
122122
| 0x77 (119) | sched_setscheduler | (pid_t pid, int policy, struct sched_param *param) | __arm64_sys_sched_setscheduler | false |
123123
| 0x78 (120) | sched_getscheduler | (pid_t pid) | __arm64_sys_sched_getscheduler | false |
124124
| 0x79 (121) | sched_getparam | (pid_t pid, struct sched_param *param) | __arm64_sys_sched_getparam | false |
125-
| 0x7a (122) | sched_setaffinity | (pid_t pid, unsigned int len, unsigned long *user_mask_ptr) | __arm64_sys_sched_setaffinity | false |
126-
| 0x7b (123) | sched_getaffinity | (pid_t pid, unsigned int len, unsigned long *user_mask_ptr) | __arm64_sys_sched_getaffinity | error |
125+
| 0x7a (122) | sched_setaffinity | (pid_t pid, unsigned int len, unsigned long *user_mask_ptr) | __arm64_sys_sched_setaffinity | true |
126+
| 0x7b (123) | sched_getaffinity | (pid_t pid, unsigned int len, unsigned long *user_mask_ptr) | __arm64_sys_sched_getaffinity | true |
127127
| 0x7c (124) | sched_yield | () | __arm64_sys_sched_yield | true |
128128
| 0x7d (125) | sched_get_priority_max | (int policy) | __arm64_sys_sched_get_priority_max | false |
129129
| 0x7e (126) | sched_get_priority_min | (int policy) | __arm64_sys_sched_get_priority_min | false |

src/arch/arm64/exceptions/syscall.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,12 @@ use crate::{
9797
},
9898
threading::{futex::sys_futex, sys_set_robust_list, sys_set_tid_address},
9999
},
100-
sched::{self, current::current_task, sched_task::state::TaskState, sys_sched_yield},
100+
sched::{
101+
self,
102+
current::current_task,
103+
sched_task::state::TaskState,
104+
syscalls::{sys_sched_getaffinity, sys_sched_setaffinity, sys_sched_yield},
105+
},
101106
};
102107
use alloc::boxed::Box;
103108
use libkernel::{
@@ -455,7 +460,8 @@ pub async fn handle_syscall() {
455460
)
456461
.await
457462
}
458-
0x7b => Err(KernelError::NotSupported),
463+
0x7a => sys_sched_setaffinity(arg1 as _, arg2 as _, TUA::from_value(arg3 as _)).await,
464+
0x7b => sys_sched_getaffinity(arg1 as _, arg2 as _, TUA::from_value(arg3 as _)).await,
459465
0x7c => sys_sched_yield(),
460466
0x81 => sys_kill(arg1 as _, arg2.into()),
461467
0x82 => sys_tkill(arg1 as _, arg2.into()),

src/sched/mod.rs

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ use crate::drivers::timer::now;
44
use crate::interrupts::cpu_messenger::{Message, message_cpu};
55
use crate::kernel::cpu_id::CpuId;
66
use crate::process::owned::OwnedTask;
7+
use crate::sched::sched_task::{CPU_MASK_SIZE, CpuMask};
78
use crate::{per_cpu_private, per_cpu_shared, process::TASK_LIST};
89
use alloc::{boxed::Box, sync::Arc, vec::Vec};
910
use core::fmt::Debug;
1011
use core::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
1112
use core::task::Waker;
1213
use core::time::Duration;
1314
use current::{CUR_TASK_PTR, current_task};
14-
use libkernel::error::Result;
1515
use log::warn;
1616
use runqueue::RunQueue;
1717
use sched_task::{RunnableTask, Work};
@@ -20,6 +20,7 @@ use waker::create_waker;
2020
pub mod current;
2121
mod runqueue;
2222
pub mod sched_task;
23+
pub mod syscalls;
2324
pub mod uspc_ret;
2425
pub mod waker;
2526

@@ -135,21 +136,30 @@ pub fn spawn_kernel_work(fut: impl Future<Output = ()> + 'static + Send) {
135136
}
136137

137138
#[cfg(feature = "smp")]
138-
fn get_best_cpu() -> CpuId {
139+
fn get_best_cpu(cpu_mask: CpuMask) -> CpuId {
139140
let r = 0..ArchImpl::cpu_count();
140-
r.min_by(|&x, &y| {
141-
// TODO: Find a way to calculate already assigned affinities and account for that
142-
let info_x = SHARED_SCHED_STATE.get_by_cpu(x);
143-
let info_y = SHARED_SCHED_STATE.get_by_cpu(y);
144-
let weight_x = info_x.total_runq_weight.load(Ordering::Relaxed);
145-
let weight_y = info_y.total_runq_weight.load(Ordering::Relaxed);
146-
weight_x.cmp(&weight_y)
147-
})
148-
.map(CpuId::from_value)
149-
.unwrap_or_else(|| {
150-
warn!("No CPUs found when trying to get best CPU! Defaulting to CPU 0");
151-
CpuId::from_value(0)
152-
})
141+
r.enumerate()
142+
// Filter to only CPUs in the mask
143+
.filter(|(i, _)| {
144+
let byte_index = i / 8;
145+
let bit_index = i % 8;
146+
(cpu_mask[byte_index] & (1 << bit_index)) != 0
147+
})
148+
.map(|(_, cpu_id)| cpu_id)
149+
// Find optimal CPU based on least run queue weight
150+
.min_by(|&x, &y| {
151+
// TODO: Find a way to calculate already assigned affinities and account for that
152+
let info_x = SHARED_SCHED_STATE.get_by_cpu(x);
153+
let info_y = SHARED_SCHED_STATE.get_by_cpu(y);
154+
let weight_x = info_x.total_runq_weight.load(Ordering::Relaxed);
155+
let weight_y = info_y.total_runq_weight.load(Ordering::Relaxed);
156+
weight_x.cmp(&weight_y)
157+
})
158+
.map(CpuId::from_value)
159+
.unwrap_or_else(|| {
160+
warn!("No CPUs found when trying to get best CPU! Defaulting to CPU 0");
161+
CpuId::from_value(0)
162+
})
153163
}
154164

155165
/// Insert the given task onto a CPU's run queue.
@@ -159,17 +169,28 @@ pub fn insert_work(work: Arc<Work>) {
159169

160170
#[cfg(feature = "smp")]
161171
pub fn insert_work_cross_cpu(work: Arc<Work>) {
162-
let last_cpu = work
163-
.sched_data
164-
.lock_save_irq()
172+
let sched_data = work.sched_data.lock_save_irq();
173+
let last_cpu = sched_data
165174
.as_ref()
166175
.map(|s| s.last_cpu)
167176
.unwrap_or(usize::MAX);
177+
let mask = sched_data
178+
.as_ref()
179+
.map(|s| s.cpu_mask)
180+
.unwrap_or([u8::MAX; CPU_MASK_SIZE]);
168181
let cpu = if last_cpu == usize::MAX {
169-
get_best_cpu()
182+
get_best_cpu(mask)
170183
} else {
171-
CpuId::from_value(last_cpu)
184+
// Check if the last CPU is still in the affinity mask, and if so, prefer it to improve cache locality.
185+
let byte_index = last_cpu / 8;
186+
let bit_index = last_cpu % 8;
187+
if (mask[byte_index] & (1 << bit_index)) != 0 {
188+
CpuId::from_value(last_cpu)
189+
} else {
190+
get_best_cpu(mask)
191+
}
172192
};
193+
drop(sched_data);
173194
if cpu == CpuId::this() {
174195
SCHED_STATE.borrow_mut().run_q.add_work(work);
175196
} else {
@@ -264,11 +285,6 @@ pub fn sched_init_secondary() {
264285
schedule();
265286
}
266287

267-
pub fn sys_sched_yield() -> Result<usize> {
268-
schedule();
269-
Ok(0)
270-
}
271-
272288
pub fn current_work() -> Arc<Work> {
273289
SCHED_STATE.borrow().run_q.current().task.clone()
274290
}

src/sched/sched_task/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ pub struct Work {
2323
}
2424

2525
pub const NR_CPUS: usize = 256;
26-
pub const CPU_MASK_SIZE: usize = NR_CPUS / 64;
26+
pub const CPU_MASK_SIZE: usize = NR_CPUS / 8;
27+
pub type CpuMask = [u8; CPU_MASK_SIZE];
2728

2829
#[derive(Clone)]
2930
pub struct SchedulerData {
@@ -36,7 +37,7 @@ pub struct SchedulerData {
3637
pub deadline: Option<Instant>,
3738
pub last_run: Option<Instant>,
3839
pub last_cpu: usize,
39-
pub cpu_mask: [u64; CPU_MASK_SIZE],
40+
pub cpu_mask: CpuMask,
4041
}
4142

4243
impl SchedulerData {
@@ -49,7 +50,7 @@ impl SchedulerData {
4950
deadline: None,
5051
last_run: None,
5152
last_cpu: usize::MAX,
52-
cpu_mask: [u64::MAX; CPU_MASK_SIZE],
53+
cpu_mask: [u8::MAX; CPU_MASK_SIZE],
5354
}
5455
}
5556
}

src/sched/syscalls/mod.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
use crate::arch::{Arch, ArchImpl};
2+
use crate::memory::uaccess::{copy_from_user_slice, copy_to_user_slice};
3+
use crate::process::thread_group::pid::PidT;
4+
use crate::sched::sched_task::CPU_MASK_SIZE;
5+
use crate::sched::{current_work, schedule};
6+
use alloc::vec;
7+
use libkernel::memory::address::UA;
8+
9+
pub fn sys_sched_yield() -> libkernel::error::Result<usize> {
10+
schedule();
11+
Ok(0)
12+
}
13+
14+
pub async fn sys_sched_getaffinity(
15+
pid: PidT,
16+
size: usize,
17+
mask: UA,
18+
) -> libkernel::error::Result<usize> {
19+
let task = if pid == 0 {
20+
current_work()
21+
} else {
22+
// TODO: Support getting affinity of other tasks if PERM_NICE
23+
return Err(libkernel::error::KernelError::InvalidValue);
24+
};
25+
let cpu_mask = {
26+
let sched_data = task.sched_data.lock_save_irq();
27+
sched_data.as_ref().unwrap().cpu_mask
28+
};
29+
let mut cpu_mask: &[u8] = &cpu_mask;
30+
if CPU_MASK_SIZE > size {
31+
cpu_mask = &cpu_mask[..size];
32+
}
33+
copy_to_user_slice(cpu_mask, mask).await?;
34+
Ok(cpu_mask.len())
35+
}
36+
37+
pub async fn sys_sched_setaffinity(
38+
pid: PidT,
39+
size: usize,
40+
mask: UA,
41+
) -> libkernel::error::Result<usize> {
42+
let mut cpu_set = vec![0u8; size];
43+
copy_from_user_slice(mask, cpu_set.as_mut_slice()).await?;
44+
let task = if pid == 0 {
45+
current_work()
46+
} else {
47+
// TODO: Support setting affinity of other tasks if PERM_NICE
48+
return Err(libkernel::error::KernelError::InvalidValue);
49+
};
50+
let mut sched_data = task.sched_data.lock_save_irq();
51+
if CPU_MASK_SIZE > size {
52+
return Err(libkernel::error::KernelError::InvalidValue);
53+
}
54+
cpu_set.truncate(CPU_MASK_SIZE);
55+
// Check if this turns off all CPUs, which is not allowed.
56+
let mut any_true = false;
57+
for i in 0..ArchImpl::cpu_count() {
58+
let byte_index = i / 8;
59+
let bit_index = i % 8;
60+
if (cpu_set[byte_index] & (1 << bit_index)) != 0 {
61+
any_true = true;
62+
break;
63+
}
64+
}
65+
if !any_true {
66+
return Err(libkernel::error::KernelError::InvalidValue);
67+
}
68+
sched_data.as_mut().unwrap().cpu_mask = cpu_set.try_into().unwrap();
69+
// TODO: apply the new affinity immediately if the current CPU is no longer in the set
70+
Ok(0)
71+
}

0 commit comments

Comments
 (0)