Linux系统内核抢占补丁的原理(代码)

news/2024/5/18 21:51:10 标签: linux, struct, list, exception, signal, each

CPU在内核中运行时并不是处处不可抢占的,内核中存在一些空隙,在这时进行抢占是安全的,内核抢占补丁的基本原理就是将SMP可并行的代码段看成是可以进行内核抢占的区域。

Linux 2.4内核正好细化了多CPU下的内核线程同步机构,对不可并行的指令块用spinlock和rwlock作了细致的表示,该补丁的实现可谓水到渠成。具体的方法就是在进程的任务结构上增加一个preempt_count变量作为内核抢占锁,它随着spinlock和rwlock一起加锁和解锁。当preempt_count为0时表示可以进行内核调度。内核调度器的入口为preempt_schedule(),它将当前进程标记为TASK_PREEMPTED状态再调用schedule(),在TASK_PREEMPTED状态,schedule()不会将进程从运行队列中删除。

下面是内核抢占补丁的主要代码示意:

arch/i386/kernel/entry.S:
preempt_count = 4 # 将task_struct中的flags用作preempt_count,flags被移到了别
的位置
ret_from_exception: # 从异常返回
#ifdef CONFIG_SMP
GET_CURRENT(%ebx)
movl processor(%ebx),%eax
shll $CONFIG_X86_L1_CACHE_SHIFT,%eax
movl SYMBOL_NAME(irq_stat)(,%eax),%ecx # softirq_active
testl SYMBOL_NAME(irq_stat)+4(,%eax),%ecx # softirq_mask
#else
movl SYMBOL_NAME(irq_stat),%ecx # softirq_active
testl SYMBOL_NAME(irq_stat)+4,%ecx # softirq_mask
#endif
jne handle_softirq
#ifdef CONFIG_PREEMPT
cli
incl preempt_count(%ebx) # 异常的入口没有禁止内核调度的指令,与ret_from_intr
匹配一下
#endif
ENTRY(ret_from_intr) # 硬件中断的返回
GET_CURRENT(%ebx)
#ifdef CONFIG_PREEMPT
cli
decl preempt_count(%ebx) # 恢复内核抢占标志
#endif
movl EFLAGS(%esp),%eax # mix EFLAGS and CS
movb CS(%esp),%al
testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?
jne ret_with_reschedule
#ifdef CONFIG_PREEMPT
cmpl $0,preempt_count(%ebx)
jnz restore_all # 如果preempt_count非零则表示禁止内核抢占
cmpl $0,need_resched(%ebx)
jz restore_all #
movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
jnz restore_all
incl preempt_count(%ebx)
sti
call SYMBOL_NAME(preempt_schedule)
jmp ret_from_intr # 新进程返回,返回ret_from_intr恢复抢占标志后再返回
#else
jmp restore_all
#endif
ALIGN
handle_softirq:
#ifdef CONFIG_PREEMPT
cli
GET_CURRENT(%ebx)
incl preempt_count(%ebx)
sti
#endif
call SYMBOL_NAME(do_softirq)
jmp ret_from_intr
ALIGN
reschedule:
call SYMBOL_NAME(schedule) # test
jmp ret_from_sys_call
include/asm/hw_irq.h:
...
#ifdef CONFIG_PREEMPT
#define BUMP_CONTEX_SWITCH_LOCK /
GET_CURRENT /
"incl 4(%ebx)/n/t"
#else
#define BUMP_CONTEX_SWITCH_LOCK
#endif
#define SAVE_ALL / 硬件中断保护入口现场
"cld/n/t" /
"pushl %es/n/t" /
"pushl %ds/n/t" /
"pushl %eax/n/t" /
"pushl %ebp/n/t" /
"pushl %edi/n/t" /
"pushl %esi/n/t" /
"pushl %edx/n/t" /
"pushl %ecx/n/t" /
"pushl %ebx/n/t" /
"movl $" STR(__KERNEL_DS) ",%edx/n/t" /
"movl %edx,%ds/n/t" /
"movl %edx,%es/n/t" /
BUMP_CONTEX_SWITCH_LOCK # 硬件中断的入口禁止内核抢占
include/linux/spinlock.h:
#ifdef CONFIG_PREEMPT
#define switch_lock_count() current->preempt_count
#define in_ctx_sw_off() (switch_lock_count().counter) 判断当前进程的抢占计数
是否非零
#define atomic_ptr_in_ctx_sw_off() (&switch_lock_count())
#define ctx_sw_off() / 禁止内核抢占
do { /
atomic_inc(atomic_ptr_in_ctx_sw_off()); / 当前进程的内核抢占计数增1
} while (0)
#define ctx_sw_on_no_preempt() / 允许内核抢占
do { /
atomic_dec(atomic_ptr_in_ctx_sw_off()); / 当前进程的内核抢占计数减1
} while (0)
#define ctx_sw_on() / 允许并完成内核抢占
do { /
if (atomic_dec_and_test(atomic_ptr_in_ctx_sw_off()) && /
current->need_resched) /
preempt_schedule(); /
} while (0)
#define spin_lock(lock) /
do { /
ctx_sw_off(); / 进入自旋锁时禁止抢占
_raw_spin_lock(lock); /
} while(0)
#define spin_trylock(lock) ({ctx_sw_off(); _raw_spin_trylock(lock) ? /锁定并
测试原来是否上锁
1 : ({ctx_sw_on(); 0;});})
#define spin_unlock(lock) /
do { /
_raw_spin_unlock(lock); /
ctx_sw_on(); / 离开自旋锁时允许并完成内核抢占
} while (0)
#define read_lock(lock) ({ctx_sw_off(); _raw_read_lock(lock);})
#define read_unlock(lock) ({_raw_read_unlock(lock); ctx_sw_on();})
#define write_lock(lock) ({ctx_sw_off(); _raw_write_lock(lock);})
#define write_unlock(lock) ({_raw_write_unlock(lock); ctx_sw_on();})
#define write_trylock(lock) ({ctx_sw_off(); _raw_write_trylock(lock) ? /
1 : ({ctx_sw_on(); 0;});})
...
include/asm/softirq.h:
#define cpu_bh_disable(cpu) do { ctx_sw_off(); local_bh_count(cpu)++; barrie
r(); } while (0)
#define cpu_bh_enable(cpu) do { barrier(); local_bh_count(cpu)--;ctx_sw_on()
; } while (0)
kernel/schedule.c:
#ifdef CONFIG_PREEMPT
asmlinkage void preempt_schedule(void)
{
while (current->need_resched) {
ctx_sw_off();
current->state |= TASK_PREEMPTED;
schedule();
current->state &= ~TASK_PREEMPTED;
ctx_sw_on_no_preempt();
}
}
#endif
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct *prev, *next, *p;
struct list_head *tmp;
int this_cpu, c;
#ifdef CONFIG_PREEMPT
ctx_sw_off();
#endif
if (!current->active_mm) BUG();
need_resched_back:
prev = current;
this_cpu = prev->processor;
if (in_interrupt())
goto scheduling_in_interrupt;
release_kernel_lock(prev, this_cpu);
/* Do "administrative" work here while we don't hold any locks */
if (softirq_active(this_cpu) & softirq_mask(this_cpu))
goto handle_softirq;
handle_softirq_back:

/*
* 'sched_data' is protected by the fact that we can run
* only one process per CPU.
*/
sched_data = & aligned_data[this_cpu].schedule_data;
spin_lock_irq(&runqueue_lock);
/* move an exhausted RR process to be last.. */
if (prev->policy == SCHED_RR)
goto move_rr_last;
move_rr_back:
switch (prev->state) {
case TASK_INTERRUPTIBLE:
if (signal_pending(prev)) {
prev->state = TASK_RUNNING;
break;
}
default:
#ifdef CONFIG_PREEMPT
if (prev->state & TASK_PREEMPTED)
break; 如果是内核抢占调度,则保留运行队列
#endif
del_from_runqueue(prev);
#ifdef CONFIG_PREEMPT
case TASK_PREEMPTED:
#endif
case TASK_RUNNING:
}
prev->need_resched = 0;
/*
* this is the scheduler proper:
*/
repeat_schedule:
/*
* Default process to select..
*/
next = idle_task(this_cpu);
c = -1000;
if (task_on_runqueue(prev))
goto still_running;
still_running_back:
list_for_each(tmp, &runqueue_head) {
p = list_entry(tmp, struct task_struct, run_list);
if (can_schedule(p, this_cpu)) {
int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c)
c = weight, next = p;
}
}
/* Do we need to re-calculate counters? */
if (!c)
goto recalculate;
/*
* from this point on nothing can prevent us from
* switching to the next task, save this fact in
* sched_data.
*/
sched_data->curr = next;
#ifdef CONFIG_SMP
next->has_cpu = 1;
next->processor = this_cpu;
#endif
spin_unlock_irq(&runqueue_lock);
if (prev == next)
goto same_process;
#ifdef CONFIG_SMP
/*
* maintain the per-process 'last schedule' value.
* (this has to be recalculated even if we reschedule to
* the same process) Currently this is only used on SMP,
* and it's approximate, so we do not have to maintain
* it while holding the runqueue spinlock.
*/
sched_data->last_schedule = get_cycles();
/*
* We drop the scheduler lock early (it's a global spinlock),
* thus we have to lock the previous process from getting
* rescheduled during switch_to().
*/
#endif /* CONFIG_SMP */
kstat.context_swtch++;
/*
* there are 3 processes which are affected by a context switch:
*
* prev == .... ==> (last => next)
*
* It's the 'much more previous' 'prev' that is on next's stack,
* but prev is set to (the just run) 'last' process by switch_to().
* This might sound slightly confusing but makes tons of sense.
*/
prepare_to_switch();
{
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
if (!mm) {
if (next->active_mm) BUG();
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next, this_cpu);
} else {
if (next->active_mm != mm) BUG();
switch_mm(oldmm, mm, next, this_cpu);
}
if (!prev->mm) {
prev->active_mm = NULL;
mmdrop(oldmm);
}
}
/*
* This just switches the register state and the
* stack.
*/
switch_to(prev, next, prev);
__schedule_tail(prev);
same_process:
reacquire_kernel_lock(current);
if (current->need_resched)
goto need_resched_back;
#ifdef CONFIG_PREEMPT
ctx_sw_on_no_preempt();
#endif
return;
recalculate:
{
struct task_struct *p;
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
for_each_task(p)
p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
}
goto repeat_schedule;
still_running:
c = goodness(prev, this_cpu, prev->active_mm);
next = prev;
goto still_running_back;
handle_softirq:
do_softirq();
goto handle_softirq_back;
move_rr_last:
if (!prev->counter) {
prev->counter = NICE_TO_TICKS(prev->nice);
move_last_runqueue(prev);
}
goto move_rr_back;
scheduling_in_interrupt:
printk("Scheduling in interrupt/n");
BUG();
return;
}
void schedule_tail(struct task_struct *prev)
{
__schedule_tail(prev);
#ifdef CONFIG_PREEMPT
ctx_sw_on();
#endif
}

 


http://www.niftyadmin.cn/n/1850946.html

相关文章

Linux驱动修炼之道-INPUT子系统(上)

Linux驱动修炼之道-INPUT子系统(上) 努力成为linux kernel hacker的人李万鹏原创作品,为梦而战。出处 http://blog.csdn.net/woshixingaaa/archive/2011/05/19/6431094.aspx 内核的输入子系统是对分散的,多种不同类别的输入设备(如键盘,鼠标…

iphone知识汇总

//Label设置换行 label.lineBreakMode UILineBreakModeWordWrap; label.numberOfLines 0; 每日分享 &#xff0d; Locale Util 取得当前的国家和语言设定 复制代码 #import <Foundation/Foundation.h> interface LocaleUtils : NSObject { } (NSString *)getCoun…

selenium 代码常见报错

报错 selenium.common.exceptions.WebDriverException: Message: unknown error: call function result missing value 分析原因&#xff1a; 由于chromedriver.exe没有加入到环境变量&#xff0c;导致报错 解决方法&#xff1a; webdriver.Chrome(r位置chromedriver.exe)转载于…

SQL SERVER 的函数

1.字符串函数 长度与分析用 datalength(Char_expr) 返回字符串包含字符数,但不包含后面的空格 substring(expression,start,length) 不多说了,取子串 right(char_expr,int_expr) 返回字符串右边int_expr个字符 字符操作类 upper(char_expr) 转为大写 lower(char_expr) 转为小写…

poj2371

简单题 View Code #include <iostream>#include <cstdio>#include <cstdlib>#include <cstring>#include <algorithm>usingnamespacestd;#definemaxn 100005intn, m, f[maxn];intmain(){//freopen("t.txt", "r", stdin);sc…

POJ 3047 Fibonacci

DEBUG很辛苦&#xff0c;且行&#xff0c; 且珍惜 原代码&#xff1a; ans[0][0] (ans[0][0] * a[flag][0][0] ans[0][1] * a[flag][1][0]) % 10000;ans[0][1] (ans[0][0] * a[flag][0][1] ans[0][1] * a[flag][1][1]) % 10000;ans[1][0] (ans[1][0] * a[flag][0][0] …

Linux驱动修炼之道-INPUT子系统(下)

Linux驱动修炼之道-INPUT子系统(下) 出处 http://blog.csdn.net/woshixingaaa/archive/2011/05/19/6433337.aspx input子系统最重要的部分就是向上层report了。这里还是先介绍几个数据结构&#xff1a; struct input_event { struct timeval time; //事件发生的时间 …

一篇著名的Linux C语言编程初级入门文章

原著&#xff1a;Rick McMullin前言Linux的发行版中包含了很多软件开发工具. 它们中的很多是用于 C 和 C应用程序开发的. 本文介绍了在 Linux 下能用于 C 应用程序开发和调试的工具. 本文的主旨是介绍如何在 Linux 下使用 C 编译器和其他 C 编程工具, 而非 C 语言编程的教程. 在…