|
進(jìn)程切換由兩部分組成:
一、context_switchLinux內(nèi)核中由context_switch實(shí)現(xiàn)了上述兩部分內(nèi)容。
具體實(shí)現(xiàn)流程:
/*
* context_switch - switch to the new MM and the new thread's register state.
*/
static __always_inline struct rq *
context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next, struct rq_flags *rf)
{
/* 進(jìn)程切換的準(zhǔn)備工作 */
prepare_task_switch(rq, prev, next);
/*
* For paravirt, this is coupled with an exit in switch_to to
* combine the page table reload and the switch backend into
* one hypercall.
*/
arch_start_context_switch(prev);
/*
* kernel -> kernel lazy + transfer active
* user -> kernel lazy + mmgrab() active
*
* kernel -> user switch + mmdrop() active
* user -> user switch
*/
if (!next->mm) { // to kernel
enter_lazy_tlb(prev->active_mm, next);
next->active_mm = prev->active_mm;
if (prev->mm) // from user
mmgrab(prev->active_mm);
else
prev->active_mm = NULL;
} else { // to user
membarrier_switch_mm(rq, prev->active_mm, next->mm);
/*
* sys_membarrier() requires an smp_mb() between setting
* rq->curr / membarrier_switch_mm() and returning to userspace.
*
* The below provides this either through switch_mm(), or in
* case 'prev->active_mm == next->mm' through
* finish_task_switch()'s mmdrop().
*/
/* 調(diào)用switch_mm_irqs_off完成用戶地址空間切換 */
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
/* will mmdrop() in finish_task_switch(). */
rq->prev_mm = prev->active_mm;
prev->active_mm = NULL;
}
}
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
/* 調(diào)用switch_to完成內(nèi)核態(tài)堆棧及硬件上下文切換 */
switch_to(prev, next, prev);
barrier();
return finish_task_switch(prev);
} 二、switch_mm對(duì)于用戶進(jìn)程需要完成用戶空間的切換,switch_mm函數(shù)完成了這個(gè)任務(wù)。switch_mm是與體系架構(gòu)相關(guān)的函數(shù)。下面以ARM體系架構(gòu)說(shuō)明用戶空間的切換過(guò)程。 #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm #endif 本文只關(guān)心ARM體系架構(gòu)。ARM進(jìn)程地址空間的切換實(shí)際是設(shè)置頁(yè)表基址寄存器TTBR0的過(guò)程,對(duì)于每個(gè)進(jìn)程擁有系統(tǒng)全部的虛擬地址空間,但是其并沒(méi)有占用所以的物理地址,物理地址的訪問(wèn)需要頁(yè)表轉(zhuǎn)換完成,頁(yè)表轉(zhuǎn)換的基址存放在頁(yè)表基址寄存器TTBR0中,每個(gè)進(jìn)程都有一套自己的映射頁(yè)表存放在物理內(nèi)存(實(shí)際最初并不是所以的頁(yè)表都存放到內(nèi)存里,而是發(fā)生缺頁(yè)異常時(shí)才將頁(yè)表寫入物理內(nèi)存),TTBR0指示了進(jìn)程PGD頁(yè)表基址,PGD指示了PTE頁(yè)表基址,PTE指示了物理地址PA。每個(gè)進(jìn)程的PGD不同,因而不同進(jìn)程虛擬內(nèi)存對(duì)于的物理地址就隔離開(kāi)了。進(jìn)程切換switch_mm實(shí)質(zhì)上就是完成TTBR0寄存器的改寫。 /*
* This is the actual mm switch as far as the scheduler
* is concerned. No registers are touched. We avoid
* calling the CPU specific function when the mm hasn't
* actually changed.
*/
static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
#ifdef CONFIG_MMU
unsigned int cpu = smp_processor_id();
/*
* __sync_icache_dcache doesn't broadcast the I-cache invalidation,
* so check for possible thread migration and invalidate the I-cache
* if we're new to this CPU.
*/
if (cache_ops_need_broadcast() &&
!cpumask_empty(mm_cpumask(next)) &&
!cpumask_test_cpu(cpu, mm_cpumask(next)))
__flush_icache_all(); /* 刷新CPU Core所有I-Cache */
/* 將當(dāng)前CPU設(shè)置到next進(jìn)程的cpumask位圖 */
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
/* 處理TLB及切換進(jìn)程頁(yè)表映射地址TTBR0 */
check_and_switch_context(next, tsk);
if (cache_is_vivt())
cpumask_clear_cpu(cpu, mm_cpumask(prev));
}
#endif
} 2.1 刷新I-CACHE如果next進(jìn)程發(fā)生遷移,在一個(gè)新的CPU上執(zhí)行,則需要flush I-Cache(Instructions Cache)。如下圖所示,對(duì)于ARM SMP架構(gòu)來(lái)說(shuō)每個(gè)core都有獨(dú)立的I-Cache和D-Cache(哈佛結(jié)構(gòu)L1 Cache),因而新進(jìn)程第一次運(yùn)行到某Core時(shí)需要將I-Cache內(nèi)容全部刷新。 __flush_icache_all函數(shù)實(shí)現(xiàn)了I-Cache刷新,flush I-Cache是通過(guò)訪問(wèn)協(xié)處理器cp15的c7寄存器實(shí)現(xiàn)的。 /* Invalidate I-cache inner shareable */
/* 將cp15協(xié)處理器c7寄存器ICIALLUIS */
#define __flush_icache_all_v7_smp() asm("mcr p15, 0, %0, c7, c1, 0" : : "r" (0));
static inline void __flush_icache_all(void)
{
__flush_icache_preferred();
dsb(ishst);
} CP15協(xié)處理器保護(hù)c0-c15共16個(gè)寄存器,寄存器32位的組織形式如下:
因而對(duì)應(yīng)ICIALLUIS (Invalidate all instruction caches Inner Shareable to PoU)寄存器。 2.2 ASID和TLBcheck_and_switch_context完成了進(jìn)程地址空間的切換,這包括兩部分內(nèi)容:
本節(jié)關(guān)注switch_mm中關(guān)于ASID和TLB的處理。 實(shí)際上,ARM TLB包含了Global和process-specific表項(xiàng)。
check_and_switch_context函數(shù)前面部分主要實(shí)現(xiàn)了ASID相關(guān)的內(nèi)容。
void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
{
unsigned long flags;
unsigned int cpu = smp_processor_id();
u64 asid;
if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
__check_vmalloc_seq(mm);
/*
* We cannot update the pgd and the ASID atomicly with classic
* MMU, so switch exclusively to global mappings to avoid
* speculative page table walking with the wrong TTBR.
*/
cpu_set_reserved_ttbr0();/* 將TTBR1的內(nèi)容設(shè)置到TTBR0 */
asid = atomic64_read(&mm->context.id);/* 獲取進(jìn)程ASID */
/* ASID沒(méi)有發(fā)生溢出,不用關(guān)系TLB,直接跳到cpu_switch_mm切換TTBR0即可 */
if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
&& atomic64_xchg(&per_cpu(active_asids, cpu), asid))
goto switch_mm_fastpath;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
/* Check that our ASID belongs to the current generation. */
/* ASID發(fā)生溢出,調(diào)用new_context為進(jìn)程重新分配ASID,并記錄到mm->context.id中 */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) {
asid = new_context(mm, cpu);
atomic64_set(&mm->context.id, asid);
}
/* ASID發(fā)生溢出,刷新TLB */
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
local_flush_bp_all(); /* 指令cache刷新 */
local_flush_tlb_all(); /* TLB刷新 */
}
atomic64_set(&per_cpu(active_asids, cpu), asid);
cpumask_set_cpu(cpu, mm_cpumask(mm));
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
cpu_switch_mm(mm->pgd, mm); /* 頁(yè)表基址寄存器TTBR0切換 */
} ASID為什么只有8bit,這是由 CONTEXTIDR(Context ID Register)寄存器決定的。cpu_switch_mm除了設(shè)置TTBR0寄存器外,還會(huì)設(shè)置CONTEXTIDR寄存器,3.3章節(jié)也會(huì)講到該寄存器。 static inline void local_flush_tlb_all(void)
{
const int zero = 0;
const unsigned int __tlb_flag = __cpu_tlb_flags;
if (tlb_flag(TLB_WB))
dsb(nshst);
__local_flush_tlb_all();
tlb_op(TLB_V7_UIS_FULL, "c8, c7, 0", zero);
if (tlb_flag(TLB_BARRIER)) {
dsb(nsh);
isb();
}
} tlb_op操作使用協(xié)處理器指令MCR操作CP15的寄存器。
因而對(duì)應(yīng)TLBIALL(invalidate unified TLB)寄存器,即將TLB entry全部刷新。 2.3 頁(yè)表轉(zhuǎn)換基址切換進(jìn)程切換需要切換進(jìn)程地址空間,每個(gè)進(jìn)程都擁有全部的虛擬地址空間,而物理地址空間是隔離的,操作系統(tǒng)能夠?qū)崿F(xiàn)這種內(nèi)存策略,依靠的是芯片級(jí)的地址轉(zhuǎn)換功能,也就是MMU(Memory Management Unit)。MMU完成了虛擬地址到物理地址的轉(zhuǎn)換工作,使得操作系統(tǒng)可以通過(guò)虛擬地址訪問(wèn)到物理地址空間的真是數(shù)據(jù)。 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
{
…………
switch_mm_fastpath:
cpu_switch_mm(mm->pgd, mm);
} cpu_switch_mm調(diào)用cpu_do_switch_mm完成進(jìn)程地址空間切換。 #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) cpu_do_switch_mm最終調(diào)用的匯編代碼cpu_v7_switch_mm。 ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU @R1寄存器即APCS定義的第二個(gè)入?yún)ⅲ磏ext進(jìn)程的內(nèi)存描述符mm mmid r1, r1 @ get mm->context.id ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_PID_IN_CONTEXTIDR mrc p15, 0, r2, c13, c0, 1 @ read current context ID lsr r2, r2, #8 @ extract the PID bfi r1, r2, #8, #24 @ insert into new context ID #endif #ifdef CONFIG_ARM_ERRATA_754322 dsb #endif mcr p15, 0, r1, c13, c0, 1 @ set context ID isb mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 isb #endif bx lr ENDPROC(cpu_v7_switch_mm) “mmid r1, r1” 將mm->context.id存入R1寄存器中。
因而對(duì)應(yīng)CONTEXTIDR(Context ID Register)寄存器,即將mm->context.id寫入CONTEXTIDR寄存器。這一步處理用于指示當(dāng)前進(jìn)程ASID(Address Space Identifier)。ASID應(yīng)用于TLB,ASID可以將不同的進(jìn)程在TLB中緩存的頁(yè)表映射隔離,因而可以避免進(jìn)程切換時(shí)將TLB表項(xiàng)刷新。
因而對(duì)應(yīng)TTBR0寄存器,即將PGD寫入TTBR0寄存器,完成進(jìn)程地址空間切換。 三、switch_to對(duì)于內(nèi)核空間及寄存器的切換,switch_to函數(shù)完成了這個(gè)任務(wù)。switch_to是與體系架構(gòu)相關(guān)的函數(shù)。下面以ARM體系架構(gòu)說(shuō)明用戶空間的切換過(guò)程。 #define switch_to(prev,next,last) do { __complete_pending_tlbi(); last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); } while (0) __switch_to匯編實(shí)現(xiàn)如下。三個(gè)入?yún)⒎謩e為:
ENTRY(__switch_to)
UNWIND(.fnstart )
UNWIND(.cantunwind )
add ip, r1, #TI_CPU_SAVE @ip = r1 + TI_CPU_SAVE
ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack
THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
THUMB( str sp, [ip], #4 )
THUMB( str lr, [ip], #4 )
ldr r4, [r2, #TI_TP_VALUE]
ldr r5, [r2, #TI_TP_VALUE + 4]
#ifdef CONFIG_CPU_USE_DOMAINS
mrc p15, 0, r6, c3, c0, 0 @ Get domain register
str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register
ldr r6, [r2, #TI_CPU_DOMAIN]
#endif
switch_tls r1, r4, r5, r3, r7
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
ldr r7, [r2, #TI_TASK]
ldr r8, =__stack_chk_guard
.if (TSK_STACK_CANARY > IMM12_MASK)
add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK
.endif
ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK]
#endif
#ifdef CONFIG_CPU_USE_DOMAINS
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
#endif
mov r5, r0
add r4, r2, #TI_CPU_SAVE
ldr r0, =thread_notify_head
mov r1, #THREAD_NOTIFY_SWITCH
bl atomic_notifier_call_chain
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
str r7, [r8]
#endif
THUMB( mov ip, r4 )
mov r0, r5
ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously
THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously
THUMB( ldr sp, [ip], #4 )
THUMB( ldr pc, [ip] )
UNWIND(.fnend )
ENDPROC(__switch_to) “add ip, r1, #TI_CPU_SAVE” 將IP寄存器賦值為r1+ TI_CPU_SAVE,r1即為prev->thread_info,TI_CPU_SAVE是cpu_context成員在thread_info中的偏移。 DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); 因此IP寄存器保存了prev->thread_info->cpu_context的地址。 struct cpu_context_save {
__u32 r4;
__u32 r5;
__u32 r6;
__u32 r7;
__u32 r8;
__u32 r9;
__u32 sl;
__u32 fp;
__u32 sp;
__u32 pc;
__u32 extra[2]; /* Xscale 'acc' register, etc */
}; “ARM( stmia ip!, {r4 - sl, fp, sp, lr} )” 將r4 - sl, fp, sp, lr寄存器中的內(nèi)容保存到IP寄存器所指向的內(nèi)存地址,即prev->thread_info->cpu_context,這相當(dāng)于保存了prev進(jìn)程運(yùn)行時(shí)的寄存器上下文。
如下操作依然是將寄存器保存到內(nèi)存,內(nèi)存地址不斷遞增,且回寫到IP寄存器。 prev寄存器R4和R5以壓入prev進(jìn)程內(nèi)核棧中,因而可以被next進(jìn)程使用,寄存器R4和R5分別用來(lái)保存next->thread_info->tp_value[0]和next->thread_info->tp_value[1] 調(diào)用atomic_notifier_call_chain函數(shù),入?yún)閠hread_notify_head和THREAD_NOTIFY_SWITCH。 add r4, r2, #TI_CPU_SAVE 實(shí)現(xiàn)r4寄存器保存了next->thread_info->cpu_context的地址。 “ARM( ldmia r4, {r4 - sl, fp, sp, pc} )” 將next->thread_info->cpu_context的數(shù)據(jù)加載到r4 - sl, fp, sp, lr,pc寄存器中,next->thread_info->cpu_context->sp存入寄存器SP相當(dāng)于內(nèi)核棧切換完成,next->thread_info->cpu_context->pc存入寄存器PC相當(dāng)于跳轉(zhuǎn)到next進(jìn)程運(yùn)行。即切換到next進(jìn)程運(yùn)行時(shí)的寄存器上下文。 這樣就完成了進(jìn)程內(nèi)核棧及寄存器切換。 關(guān)于ARM寄存器介紹請(qǐng)參看《ARM體系架構(gòu)—ARMv7-A處理器模式及寄存器》 |
|
|